1 Raw data

data <- read.csv("/home/bambrozi/2_CORTISOL/Data/T4_Elora_Data_04_25_2024.csv")

# Replace "treated" with NA
data$T4Cortisol[data$T4Cortisol == "treated" | data$T4Cortisol == "Treated at T2" | data$T4Cortisol == "treated at T2"] <- NA
# Convert the column to numeric, coercing non-numeric values to NA
data$T4Cortisol <- as.numeric(as.character(data$T4Cortisol))
#Filtering only the lines with values
data <- data[!is.na(data$T4Cortisol),]
#creating new data file cleaned  
write.csv(data, "/home/bambrozi/2_CORTISOL/Data/data_clean.csv", row.names = F)

print(data)

2 Continuous Phenotype

# Summary Statistics
summary(data$T4Cortisol)
# Histogram
hist(data$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
# Boxplot
boxplot(data$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
# Density Plot
plot(density(data$T4Cortisol), main = "Density Plot of T4 Cortisol", xlab = "T4 Cortisol", ylab = "Density")
# Calculate the theoretical quantiles
qqnorm(data$T4Cortisol, main = "QQ Plot of T4Cortisol", xlim = c(min(qqnorm(data$T4Cortisol)$x), max(qqnorm(data$T4Cortisol)$x)), ylim = c(min(qqnorm(data$T4Cortisol)$y), max(qqnorm(data$T4Cortisol)$y) + 2 * IQR(qqnorm(data$T4Cortisol)$y)))
# Add the QQ line
qqline(data$T4Cortisol, col = "red")

Summary statistics My Image

Histogram My Image

Density My Image

Box_Plot My Image

qq_Plot My Image

Shapiro-Wilk normality test My Image

2.1 Categorical Phenotype

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

data$Categorical <- ifelse(data$T4Cortisol >= 956, "H", 
                           ifelse(data$T4Cortisol <= 190.8, "L", "M"))

table(data$Categorical)
library(ggplot2)

# Reorder the levels of the 'Categorical' column
data$Categorical <- factor(data$Categorical, levels = c("L", "M", "H"))

# Create the histogram with reordered categories
ggplot(data, aes(x = Categorical, fill = Categorical)) +
  geom_bar() +
  labs(title = "Histogram of T4Cortisol by Category",
       x = "Category",
       y = "Frequency") +
  theme_minimal()

# Create the histogram
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_histogram(binwidth = 50, color = "black", alpha = 0.7) + # Adjust binwidth as needed
  labs(title = "Histogram of T4Cortisol with Color by Category",
       x = "T4 Cortisol",
       y = "Frequency",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create the density plot
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_density(alpha = 0.3) +
  labs(title = "Density Plot of T4Cortisol with Color by Category",
       x = "T4Cortisol",
       y = "Density",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create a density plot
ggplot(data, aes(x = T4Cortisol)) +
  geom_density() +
  geom_vline(xintercept = c(956, 190.8), linetype = "dashed", color = "red") +
  labs(title = "Density Plot of T4Cortisol with Vertical Lines",
       x = "T4Cortisol",
       y = "Density") +
  theme_minimal()

The animals were sorted in these three categories >H = Hight >M = Medium >L = Low

My Image

The individuals frequency distribution in theese categories are shown in the plots below

My Image My Image My Image My Image

2.2 Removing “outliers”

Observing the previous plots I tried to remove the “outliers” phenotypes above 1250, but the outcome from Shapiro test is still indicating no normality of the data.

library(tidyverse)

data_no_out <- filter(data, T4Cortisol < 1250)

# Create QQ plot
qqnorm(data_no_out$T4Cortisol, main = "QQ Plot of T4Cortisol", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(data$T4Cortisol, col = "red")

boxplot(data_no_out$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")

hist(data_no_out$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")

shapiro.test(data$T4Cortisol)

My Image My Image My Image My Image

3 Genetic Correlation

To assess the correlation between Cortisol phenotypes and Genomic Estimated Breeding Values (GEBVs), we opt for a linear regression instead of a standard correlation test. This decision is driven by the non-normal distribution of our Cortisol phenotypes, which violates the assumptions required for traditional correlation tests.

Linear regression offers a robust alternative as it does not necessitate normality for the dependent variable. By regressing GEBVs over Cortisol, we can model the relationship between these variables. Our aim is to estimate the regression coefficient, which serves as our correlation estimate.

Due to the violation of normality assumptions for the dependent variable (Cortisol), traditional correlation tests may not provide reliable results, particularly in assessing the significance of the correlation. Therefore, alternative approaches, such as linear regression, are preferred as they do not require the same assumptions about the distribution of the dependent variable. By using linear regression, we can still assess the relationship between Cortisol and GEBVs while accommodating the non-normality of Cortisol phenotypes.

The regression model can be represented as follows: \[ y = \beta_0 + \beta_1 \times GEBV_{\text{Milk}} + \epsilon \]

Where:

This approach enables us to quantify the relationship between Cortisol and GEBVs, addressing the non-normality of Cortisol phenotypes while allowing for formal hypothesis testing of the correlation’s significance.

3.1 Data preparation

The first data I received from Lucas had only 135 animals out of 260 with values the other 125 had only NA I shown this to Lucas Lucas wrote to Alisson Lucas sent me the missing animals I merged this two files

rm(list = ls())

# Load the necessary library
library(dplyr)
library(tidyverse)

cortisol_260 <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")

#This is the first dataframe with information for 135 animals and 125 NA
GEBVs1 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora.csv")
#This is the second file with information for the 125 NA animals
GEBVs2 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/elora_missing_females_2404_06_11_2024.csv")
#This are de columns we can use because we know the meaning of the acronyms
GEBVs_to_use <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebv_names_lucas_06102024_BAG.csv")


sum(is.na(GEBVs1$MILK))
GEBVs1<- GEBVs1[which(is.na(GEBVs1[,"DHI_BARN_NAME"]) == F),]

sum(!is.na(GEBVs2$MILK))
GEBVs2<- GEBVs2[which(is.na(GEBVs2[,"DHI_BARN_NAME"]) == F),]

print(GEBVs1$DHI_BARN_NAME)
print(GEBVs2$DHI_BARN_NAME)

# Making the two dataframes with the same columns
# Remove elora_id and international_id from GEBVs1
GEBVs1 <- GEBVs1 %>% select(-elora_id, -international_id)

# Remove ANIMAL_ID from GEBVs2
GEBVs2 <- GEBVs2 %>% select(-ANIMAL_ID)

# Check if the two dataframes have the same columns
have_same_columns <- all(names(GEBVs1) == names(GEBVs2))

if (have_same_columns) {
  print("The dataframes have the same columns.")
} else {
  print("The dataframes do not have the same columns.")
}


# Check if the column names are in the same order
same_order <- identical(names(GEBVs1), names(GEBVs2))

if (same_order) {
  print("The columns are in the same order.")
} else {
  print("The columns are not in the same order.")
}

GEBVs_combined <- rbind(GEBVs1, GEBVs2)

# Sort the columns
sorted_cortisol_260 <- sort(cortisol_260$ID)
sorted_GEBVs_combined <- sort(GEBVs_combined$DHI_BARN_NAME)

# Check if the sorted columns have the same values
identical(sorted_cortisol_260, sorted_GEBVs_combined)

# Create a duplicate of the column 'DHI_BARN_NAME' and name it 'elora_id'
GEBVs_combined$elora_id <- GEBVs_combined$DHI_BARN_NAME

# Assuming GEBVs_combined is your data frame
GEBVs_combined <- GEBVs_combined %>%
  select(elora_id, DHI_BARN_NAME, everything())

write.csv(GEBVs_combined, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora_complete.csv")

# Merging the dataframe with Cortisol values, with the dataframe with GEBVs values
Merg_Cort_GEBVs <- merge(cortisol_260, GEBVs_combined, by.x = "ID", by.y = "elora_id")

write.csv(Merg_Cort_GEBVs, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")

#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")

colnames(Merg_Cort_GEBVs)[405] <- "IDD"

data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, all_of(Columns_to_use))

# The data below has the the 55 GEBVs + Cortisol data + Birth Year
write.csv(data, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits.csv")

samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")

# Convert Sampling_date to Date using as.Date
samp_date$Sampling_date <- as.Date(samp_date$Sampling_date, format = "%m/%d/%Y")

table(samp_date$Sampling_date)

samp_date <- select(samp_date, Elora_id, Sampling_date)

# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)

data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")

data_final <- data_final %>%
  select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date, everything())

# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")

ps. I double checked by hand the select and merge process against the original tables received and is everything ok.

3.2 Correlations - Linear Regression

We tested the minimum model, adding only Birth Year, but adding Birth Year and Sampling Date we got the best results.

3.2.1 Adding BIRTH_YEAR and SAMPLING DATE

The regression model added the BY and SAMPLING DATE is shown bellow:

\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + SAMPLING\_DATE + \epsilon \]

Where:

  • \(y\) represents Cortisol phenotypes.
  • \(GEBV_{\text{Trait}}\) denotes the GEBV for the specific trait (e.g., Milk Yield).
  • \(BIRTH\_YEAR\) is the birth year of the subjects, included as a factor.
  • \(SAMPLING\_DATE\) is the cortisol sampling date for the subjects, included as a factor.
  • \(\beta_0\) and \(\beta_1\) are the intercept and regression coefficient, respectively.
  • \(\epsilon\) represents the error term capturing unexplained variability.

The SAMPLING_DATE variable is also converted to a factor to account for the categorical nature of sampling date.

# Convert BIRTH_YEAR to a factor and rename
data_final$BIRTH_YEAR <- as.factor(data_final$BIRTH_YEAR)

# Convert Sampling_data to a factor and rename
data_final$Sampling_date <- as.factor(data_final$Sampling_date)

# Initialize a list to store the results
results_list <- list()

# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 5:ncol(data_final)) {
  trait_name <- colnames(data_final)[i]
  
  # Fit the linear regression model with BIRTH_YEAR as an additional predictor
  model <- lm(data_final[[2]] ~ data_final[[i]] + data_final$BIRTH_YEAR + data_final$Sampling_date , data = data_final)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 2]] <- result
}

# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY_SAMP/regression_summary_all_traits_BY_SampDt.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR and SAMPLING_DATE

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
CO 0.3913617 0.3257757 5.967151 0 -11.1849879 0.0099861
BMR 0.3899307 0.3241904 5.931386 0 14.4612444 0.0135665
LP 0.3858330 0.3196512 5.829896 0 12.2619950 0.0330355
MILK 0.3850052 0.3187343 5.809559 0 0.0671160 0.0396652
PROT 0.3841834 0.3178238 5.789422 0 2.2166395 0.0476301
UT 0.3822859 0.3157218 5.743130 0 -12.1238483 0.0731558
CK 0.3801906 0.3134008 5.692345 0 12.7333709 0.1192726
HHE 0.3795579 0.3126999 5.677076 0 7.4003505 0.1388525
MSP 0.3793006 0.3124149 5.670876 0 -7.2388952 0.1478152
DA 0.3791391 0.3122360 5.666989 0 10.8037510 0.1537699
TU 0.3785722 0.3116080 5.653351 0 -5.2064639 0.1769405
MSL 0.3777098 0.3106526 5.632656 0 -8.6230634 0.2203509
BQ 0.3775224 0.3104450 5.628166 0 -7.7284891 0.2313766
IH 0.3772456 0.3101385 5.621542 0 -4.7354916 0.2488963
ST 0.3767980 0.3096426 5.610839 0 -9.9721644 0.2808121
LOC 0.3766890 0.3095218 5.608233 0 -7.2367609 0.2893509
FAT 0.3765584 0.3093772 5.605116 0 0.8520581 0.3000062
UD 0.3763515 0.3091480 5.600177 0 -9.3297651 0.3179533
CA 0.3757417 0.3084725 5.585641 0 6.0756898 0.3798799
MS 0.3755038 0.3082090 5.579979 0 -6.0375749 0.4085912
SCK 0.3754363 0.3081342 5.578372 0 -4.4161473 0.4173165
SCS 0.3754033 0.3080976 5.577587 0 3.9080681 0.4216769
IDD 0.3752677 0.3079474 5.574362 0 3.7029601 0.4403519
FOOT 0.3751830 0.3078536 5.572349 0 17.3309360 0.4526548
TL 0.3750803 0.3077398 5.569908 0 -6.6585204 0.4683140
MET 0.3749055 0.3075462 5.565755 0 -3.4014191 0.4970656
CTFS 0.3747600 0.3073850 5.562300 0 4.0557716 0.5233387
CONF 0.3746137 0.3072229 5.558828 0 -4.7896643 0.5523352
FE 0.3745078 0.3071056 5.556316 0 -2.9375586 0.5752627
HL 0.3743988 0.3069849 5.553731 0 3.4274416 0.6009102
FA 0.3742870 0.3068610 5.551080 0 -3.2821641 0.6298652
DD 0.3742836 0.3068573 5.551001 0 2.5402141 0.6307730
MOB 0.3742556 0.3068263 5.550337 0 3.0592146 0.6385442
FTP 0.3742359 0.3068045 5.549870 0 4.8583743 0.6441419
BCS 0.3741681 0.3067293 5.548263 0 2.6692352 0.6643615
HH 0.3740753 0.3066266 5.546066 0 2.0230249 0.6947785
DF 0.3740392 0.3065865 5.545209 0 2.3681806 0.7076996
FL 0.3739824 0.3065237 5.543865 0 -2.1711035 0.7294613
UF 0.3739605 0.3064994 5.543347 0 2.8880683 0.7384413
MDR 0.3738855 0.3064163 5.541571 0 1.8495373 0.7722558
WL 0.3738548 0.3063822 5.540843 0 1.2643028 0.7878801
AFS 0.3738293 0.3063540 5.540239 0 1.5814380 0.8018670
RUM 0.3738023 0.3063241 5.539601 0 -1.2574561 0.8179113
SH 0.3738015 0.3063233 5.539583 0 1.0342843 0.8184040
BD 0.3736958 0.3062061 5.537080 0 0.7613199 0.9071017
MR 0.3736957 0.3062060 5.537078 0 0.6291752 0.9071898
SU 0.3736871 0.3061965 5.536876 0 0.4745538 0.9186634
FEED 0.3736818 0.3061907 5.536751 0 0.3944746 0.9266753
DHL 0.3736774 0.3061858 5.536647 0 -0.5919776 0.9340753
DO 0.3736757 0.3061838 5.536604 0 0.5220723 0.9373261
DS 0.3736695 0.3061769 5.536458 0 0.3979132 0.9502668
CW 0.3736676 0.3061749 5.536414 0 0.3562368 0.9547808
ME 0.3736658 0.3061729 5.536370 0 -0.2401976 0.9599012
MT 0.3736595 0.3061659 5.536223 0 -0.0976386 0.9882642
DCA 0.3736593 0.3061657 5.536217 0 0.0802555 0.9906432
Fitting Birth_Year and Sampling_date to the model these are the traits with significant correlation (<0.15):
  • CO = Cystic ovaries
  • BMR = Body Maintenance Requirements
  • LP = Lactation persistency
  • MILK = Milk yield
  • PROT = Protein yield
  • UT = Udder Texture
  • CK = Clinical Ketosis
  • HHE = Heel Horn Erosion
  • MSP = Milking Speed

4 GENOTYPES

Lucas Alcântara sent me the path to the genotype and pedigree files: /data/cgil/daiclu/6_Data/database/public_output/bruno

My Image

In this folder we found the following files:

I made a copy of this files in a folder called Raw_files:

/home/bambrozi/2_CORTISOL/RawFiles

This directory has two sub-directories:

5 PHENOTYPE file

library(data.table)

pheno <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
ped <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
geno <- fread("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
geno <- geno[,c("V2")]

#Bringing cdn_id to my phenotype file
#Generate a index with the match
matching_indices <- match(pheno$ID, ped$elora_id)
# Then, assign 'cdn_id' from 'ped' to 'pheno' where there are matches
pheno$cdn_id <- ifelse(!is.na(matching_indices), ped$cdn_id[matching_indices], NA)

#Making a phenotype file only with genotyped animals
pheno_genotyped <- pheno[pheno$cdn_id %in% geno$V2,] 

#check if all animals in this file are genotyped
checkk <- pheno_genotyped$cdn_id %in% geno$V2
sum(checkk)

#The phenotype file should have three columns: FID, Animal_id, Phenotype
HO <- rep("HO", 252)

pheno_gwas <- as.data.frame(cbind(HO, pheno_genotyped$cdn_id, pheno_genotyped$T4Cortisol))

colnames(pheno_gwas) <- c("FID", "cdn_id", "cortisol")

pheno_gwas$cdn_id <-  as.numeric(pheno_gwas$cdn_id)
pheno_gwas$cortisol <- round(as.numeric(pheno_gwas$cortisol),2)

write.table(pheno_gwas, "/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", quote = F, row.names = F, col.names = T)

6 SNP MAP

Adjusting the SNP_map to .map

map <- fread("/data/cgil/daiclu/6_Data/database/public_output/bruno/DGVsnpinfo.2404.ho")
morgan <- data.frame(X0 = rep(0, 45101))
mapa=as.data.frame(cbind(map$chr, map$snp_name, morgan$X0, map$location))
head(mapa)
write.table(x = mapa, file = "/home/bambrozi/2_CORTISOL/Geno_files/genoplink.map", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)

6.1 Generating the bfiles

system("/home/local/bin/plink --cow --nonfounders --allow-no-sex --keep-allele-order --file /home/bambrozi/2_CORTISOL/Geno_files/genoplink --make-bed --out /home/bambrozi/2_CORTISOL/Geno_files/genoplink")
With the code above I generated the bfiles:
    genoplink.bed
  • genoplink.bim
  • genoplink.fam
  • genoplink.log
  • genoplink.nosex

7 Quality Control

We ran the code below to perfom the QC okok

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files/genoplink
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean

/home/local/bin/plink \
    --bfile ${DATA} \
    --cow \
    --allow-no-sex \
    --hwe 1e-5 \
    --maf 0.01 \
    --geno 0.1 \
    --mind 0.1 \
    --keep-allele-order \
    --make-bed \
    --out ${RESULT}
    

The server screen outcome is shown below. My Image

After the Quality Control we ended up with

8 KING

To check for duplicated individuals I performed the KINSHIP analysis using one script from Larissa Braga. Running the King Analysis on Plink. okok

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king

plink2 \
    --bfile ${DATA} \
    --cow \
    --make-king-table \
    --out ${RESULT}

This is the output screen on terminal:

My Image

The table below is the output /home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king.kin0 and have pairwise comparisons between all individuals.

My Image

Now we should open in R and check for individuals with more than 0,354, to perform this we can use the code below, also provided by Larissa Braga: okok

setwd("/home/bambrozi/2_CORTISOL/Geno_files_after_KING")

relatedness="result_king.kin0" ## change accordingly!!

library(data.table)

print(relatedness)
rel=fread(relatedness, h = T)
head(rel)

print("Individuals with different identifications above the cut off of 0.354:")
dup=subset(rel, KINSHIP >= 0.354  & IID1!=IID2)
print(dup)
nrow(dup)

So the code above will provide this output if there is no duplicated individual.

My Image

We do not have any duplicated individual

So the file to be used are those in the directory /home/bambrozi/2_CORTISOL/Geno_files_after_QC

files:genoplink_clean

After Quality Control we didn’t lost any animal, so we don’t need to update our phenotype file

9 PCA

Now before performing the PCA analysis we need to change the FID for those individuals that has phenotype = 1 for Nadia.

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/PCA/pca_result

plink \
    --bfile ${DATA} \
    --keep-allele-order \
    --cow \
    --pca \
    --out ${RESULT}

The PCA output:

My Image

9.1 PCA Plot

After generate the Eigenvalues and Eigenvectors I used the code below to generate the PCA Plot

setwd("/home/bambrozi/2_CORTISOL/PCA")

library(ggplot2) 
library(tidyverse)

##
# Visualize PCA results
###

# read in result files
eigenValues <- read_delim("pca_result.eigenval", delim = " ", col_names = F)
eigenVectors <- read_delim("pca_result.eigenvec", delim = " ", col_names = F)

## Proportion of variation captured by each vector
eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)


# PCA plot
png("pca-plink.eng.png", width=5, height=5, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 1, alpha = 1) +
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()


# PCA plot with animal ids
png("pca-plink.eng.animals_id.png", width=50, height=50, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 5, alpha = 1) +
  geom_text(mapping = aes(x = X3, y = X4, label = X2), size = 2, hjust = 0, vjust = 0) +  # Add labels for animal IDs
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()

My Image

10 GWAS on GCTA

Previously we have performed GWAS on GCTA:

10.1 GWAS - EXTREME PHENO - WITH BY and SD

10.1.1 Data preparation

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)


# Create an matrix with fixed effects with only those animals which also have phenotype and genotype
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]

# Now we are gona remove the intermediary animals from pheno object
pheno$Categorical <- ifelse(pheno$cortisol >= 956, "H", 
                           ifelse(pheno$cortisol <= 190.8, "L", "M"))
table(pheno$Categorical)
pheno <- pheno[pheno$Categorical != "M", ]
pheno <- pheno[, c("FID", "cdn_id", "cortisol")]

# Now we are going to remove from fixeff the animals which are not in pheno
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id,]

#Checking if match the animals and order
identical(fixeff$cdn_id, pheno$cdn_id)

#Creating a file with animals to keep in the genotype file, we will use it on Plink
to_keep_geno <- pheno[, c("FID", "cdn_id")]

write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt", quote = F, row.names = F, col.names = T)
write.table(pheno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt", quote = F, row.names = F, col.names = T)
write.table(to_keep_geno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt", quote = F, row.names = F, col.names = F)

We ended up with
H (Hight) = 34 animals
L (Low) = 37 animals Total = 71 animals

On Plink we will remove all individuals from genotype files that are classified as Medium, keeping only the High and Low

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
KEEP=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt

plink2 --bfile ${DATA} --keep ${KEEP} --chr-set 29 --make-bed --out ${RESULT}

10.1.2 GWAS on GCTA

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --qcovar ${FIXEFF} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

After ran the GWAS above I got the following message from the GCTA:

Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable. You may have a try of the option –reml-no-constrain.

As we got this error message, we needed to solve this problem, and for that we used the whole sample size (252 individuals) to estimate the variance components, and after this, using this variance components from the whole sample we performed the ssGWAS with the subset of individuals (34 High + 37 Low = 71), but this was not possible in GCTA so we switched to another software (BLUPF90+)

11 BLUPF90+ GWAS

To run ssGWAS on Blupf90+ suite, we will need 4 different softwares:

The tutorial for preGSF90 and postGSF90 we can find in the link bellow https://nce.ads.uga.edu/wiki/doku.php?id=readme.pregsf90#gwas_options_postgsf90

According to the BLUPF90+ tutorial:

ssGWAS is an iterative procedure with 2 steps:
0) Quality control
1) prediction of GEBV with ssGBLUP
2) prediction of SNP marker effects based on the GEBV

11.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • preGSf90 executable file
  • postGSf90 executable file
  • Parameter file

      11.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Phenotype
      THIRD COLUMN = Fixed Effect 1
      FOURTH COLUMN = Fixed Effect 2

      First we are going to generate a Phenotype_Fixed_Effect file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      To get in one file these four columns we need the following code:

      okok

      pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
      data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
      ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
      
      
      # Create data_final$cdn_id by matching IDs with elora_id
      data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
      fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
      fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
      fixeff$FID <- "HO"
      fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]
      
      identical(fixeff$cdn_id, pheno$cdn_id)
      
      fixeff <- fixeff[, c("cdn_id", "BIRTH_YEAR", "Sampling_date")]
      pheno <- pheno[,c("cdn_id", "cortisol")]
      
      # Load necessary libraries
      library(dplyr)
      
      # Merge pheno and fixeff data frames
      merged_data <- pheno %>% 
        left_join(fixeff, by = "cdn_id")
      
      
      merged_data$iid <- ids_eq$iid[match(merged_data$cdn_id, ids_eq$cdn_id)]
      
      merged_data <- merged_data[, c("iid", "cortisol", "BIRTH_YEAR", "Sampling_date")]
      
      write.table(merged_data, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt", col.names = F, quote = F, row.names = F)

      The file should be saved as text file, with separation by space and no columns names.

      PS: if there are any NA, they sould be replaced by 9999

      /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt (this file has 252 individuals)

      11.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Sire ID
      THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      /home/bambrozi/2_CORTISOL/RawFiles/Pedigree okok

      # to replace comma for space in the .csv file with the equivalence among IDs
      # /home/bambrozi/2_CORTISOL/RawFiles/Pedigree
      sed 's/,/ /g' bruno_ped.iid.csv > bruno_ped_iid_blup.txt

      11.1.3 Genotype file

      First we are going to generate a Genotype file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.

      /home/bambrozi/2_CORTISOL/RawFiles/Genotypes

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid

      Below we can find the file’s location from the code above: /home/bambrozi/2_CORTISOL/RawFiles/Genotypes/bruno_gntps.txt /home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/bruno_gntps_iid (this file has 252 individuals)

      11.1.4 Download the executable files

      Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
      • BlupF90+ = we will use to estimate the Variance components and GEBVs
      • renumF90 = we will use to renumerate the files
      • preGSf90 = we will use to perform the Quality control
      • postGSf90 = we will use for GWAS

      11.1.5 SNP MAP

      /home/bambrozi/2_CORTISOL/RawFiles/SNP_map okok

      mapfile <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/DGVsnpinfo.2404.ho", header = T)
      
      colnames(mapfile)
      
      colnames(mapfile) <- c("SNP_ID", "CHR", "POS")
      
      mapfile <- mapfile[,c("CHR", "POS", "SNP_ID")]
      
      write.table(mapfile, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/snpmap.txt", col.names = T, row.names = F, quote = F)

11.2 Variance component estimation

But, before running the GEBV we will first perform one additional step to “CLEAN” our genotypes. Actually BLUPF90 by default perform a data cleaning with pre set parameters, but we will change some default parameters an so perform this additional step.

The Parameter card for this step is the parameter bellow:

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renum_QC.par

okok


DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
1.0
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid
PED_DEPTH
0
(CO)VARIANCES
1.0
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
  • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
  • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
  • FIELDS_PASSED TO OUTPUT:
  • WEIGHT (S):
  • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
  • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
  • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
  • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
  • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
  • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
  • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
  • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
  • OPTION outcallrate: Save the call rate information on SNP markers in the file.
  • OPTION saveCleanSNPs: This option generates 4 new files. We assume snpfile as a marker file.
    • snpfile_clean = new SNP marker file.
    • snpfile_clean_XrefID = new cross-reference file.
    • snpfile_SNPs_removed = a list of removed markers.
    • snpfile_Animals_removed = a list of removed animals.
  • OPTION minfreq 0.01: Minimum allele frequency to retain the marker.
  • OPTION map_file snpmap.txt: This option will upload the SNP MAP
  • OPTION excludeCHR 30 31: This option will remove sexual chromosome that is the 30 and 31

To run any softwere from Blupf90 suit we will perform always in this way:

  1. Go to the server you wanna run this analysis, for instance, grand
ssh grand
  1. Now go to the directory you have created to run this analysis where that set of files are placed.
cd /home/bambrozi/2_CORTISOL/GWAS/BLUPF90
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will ask you the name of your parameter card, for this step is renum_QC.par.

The command above will generate couple files, among them renf90.par

We modified renf90.par in:
  • renf90_DataClean.par
  • renf90_VarCompEst.par

11.2.1 Quality control

The parameter card to perform the Quality Control is: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_DataClean.par

It will run using the software pre preGS90 to generate the Clean Genotype and SNP_MAP files after Quality Control. okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

11.2.2 VCE

The second parameter card used for Variance Components Estimation (VCE) is the following: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_VarCompEst.par

It will be run using the software pre blupf90+ to generate the VCE. okok

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid_clean
OPTION no_quality_control
OPTION method VCE
OPTION origID
OPTION missing 9999
OPTION se_covar_function H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
  • OPTION SNP_file bruno_gntps_iid_clean: we are going to inform the genotype file generated in the previous step (the Quality Control). Blup will create an file with the same name that the original genotype file, and add the sufix **_clean**
  • OPTION no_quality_control we need to set up this option because we performed Quality Control in the previous step and now we don’t need that Blupf90+ perform again. Blupf90+ by default perform quality control, so if we do not want, we need to specify.
  • OPTION method: VCE (Variance Component Estimation).
  • OPTION OrigID: this will keep the original ID informed.
  • OPTION missing 9999: you are informing that missing values will appear as 9999
  • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
    • H2_1: the name that your function will appear on the output files.
    • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
    • **_1_1**: this effect is in the 1st column.
    • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

In the parameter card above, we remove the option for Quality Control and added options for Variance Components Estimation, for Missing data, for origID and for heritability estimation, but the MOST IMPORTANT PART is we need to change the OPTION SNP_FILE, replacing the original genotype file, for the “clean” version generated in the previous step.

The Variance Components will be placed in the file: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/blupf90.log

blupf90.log
My Image

Now you should update you renf90_VarCompEst.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90_VarCompEst.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90_VarCompEst.par (CO) VARIANCE

run blupf90+ again with the updated renf90_VarCompEst.par

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

blupf90.log
My Image

Parameter card
My Image

Now that we have the Variance components we go for the next step:
  • Prediction of SNP marker effects based on the GEBV
  • GWAS for High and Low cortisol animals
To do this, I’ll:
  • Keep the Phenotype_FixEff file from the previous analysis with records for 252 individuals
  • generate the genotype file like bellow
  • Perform the QUALITY CONTROL for the new (71 samples) genotype file
  • Add the VCE from the 252 data set in the parameter card
  • Generate the GEBV
  • Run GWAS

11.2.3 Prediction of SNP marker effects based on the GEBV

11.2.3.1 GWAS for High and Low cortisol animals

Now we’ll run a new analysis using the Variance Components Estimation from the previous step to perform the GWAS.

To perform this first we need a a Genotype file only with the 71 animals (High=34 and Low=37)

11.2.3.1.1 Updating the files
11.2.3.1.1.1 Phenotype and Fixed Effects files

In this read_me file we performed a ssGWAS but keeping the phenotype and fixed effect information for all 252 individuals, only reducing the number of individuals for the Genotype file.

11.2.3.1.1.2 Genotype files

I used this command line bellow to remove the individuals with MEDIUM phenotypes. okok I ran this code in /home/bambrozi/2_CORTISOL/GWAS/BLUPF90

awk 'NR==FNR{ids[$1]; next} $1 in ids' fenofix.txt bruno_gntps_iid > bruno_gntps_iid_71

And the output I coppied to /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno

11.2.3.2 Running renum_QC.par

Run with renumf90

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renum_QC.par

okok

DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
77182
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid_71
PED_DEPTH
0
(CO)VARIANCES
28212
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
We modified renf90.par in 3 copies:
  • renf90_DataClean.par
  • renf90_ssGWAS1_Ginv.par
  • renf90_ssGWAS2_SNPeff.par

11.2.3.3 Running renf90_DataClean.par for Quality Control

To run the parameter card bellow we are going to use the software presGSf90 /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renf90_DataClean.par

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

11.2.3.4 Running renf90_ssGWAS1_Ginv.par for Ginv estimation

May be necessary to run the command bellow on the server Setting the stack size to “unlimited” allows the program to allocate memory for these large structures without hitting stack limits. By removing stack size limits, BLUPF90 is less likely to encounter segmentation faults or memory allocation issues that arise when the stack space is insufficient for the computations needed.

ulimit -s unlimited

The parameter card bellow we are going to run using the software blupf90+:

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renf90_ssGWAS1_Ginv.par

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION no_quality_control
OPTION origID
OPTION missing 9999
OPTION saveGInverse
OPTION saveA22Inverse
OPTION snp_p_value

11.2.3.5 Running renf90_ssGWAS2_SNPeff.par for GWAS

The parameter card bellow we are going to run using the software postGSf90: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/renf90_ssGWAS2_SNPeff.par

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot 
This code will generate several files, among them:
  • chrsnp_pval:
    • Column 1: trait
    • Column 2: effect
    • Column 3: -log10(p-value)
    • Column 4: SNP
    • Column 5: Chromosome
    • Column 6: Position in bp
  • Pft1e3.R: a r-code to generate the Manhattan plot in R using the chrsnp_pval

11.2.3.6 Manhattan Plots for BLUPF90 GWAS

11.2.3.6.1 Genome Independent Segment

To make the Manhattan Plot considering Genome Independent Segment we should run the code bellow. This code has part of the code in the file Pft1e3.R

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)


setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno")
# Read in and process data for Manhattan plot
yyy1 <- read.table("chrsnp_pval")
yyy <- yyy1[order(yyy1$V4), ]
zzz <- yyy[which(yyy$V1 == 1 & yyy$V2 == 3), ]
n <- nrow(zzz)
y <- zzz[, 4]
x <- zzz[, 3]
chr1 <- zzz[, 5]
chr <- NULL
pos <- NULL

for (i in unique(yyy$V5)) {
  zz <- yyy[yyy$V5 == i, ]
  key <- zz$V4
  medio <- round(nrow(zz) / 2, 0)
  z <- key[medio]
  pos <- c(pos, z)
}

chrn <- unique(yyy$V5)
one <- which(chr1 %% 4 == 0)
two <- which(chr1 %% 4 == 1)
three <- which(chr1 %% 4 == 2)
four <- which(chr1 %% 4 == 3)
chr[one] <- "darkgoldenrod"
chr[two] <- "darkorchid"
chr[three] <- "blue"
chr[four] <- "forestgreen"

# Create Manhattan plot with Bonferroni line and legend
png(file = "Pft1e3_manplot_with_bonf_ind_seg.png", width = 20000, height = 10000, res = 600)
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2)
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP p_value - Trait: 1 Effect: 3", xlab = "", ylab = "-log10(p-value)", pch = 20, xlim = c(1, n), ylim = c(0, max(x)), col = chr)

# Add Bonferroni line
abline(h = bonf, col = "red", lwd = 2, lty = 2)

# Add legend for Bonferroni line
legend("topright", legend = "Bonferroni correction for genome independent segments", col = "red", lwd = 2, lty = 2, cex = 1)

axis(1, at = pos, labels = chrn)
dev.off()

My Image

11.2.3.7 Get rsID

For additional analysis like Variant Effect Prediction (VEP) we need the rsID, to get the rsID we use the software SNPChimp which requires SNP_names, but the BLUPF90 output have only the Chromosome and Position of the SNPs, so we are going to perfome these two steps to get one file with t he significant SNPs + SNP_name + rsID

11.2.3.7.1 Step 01 = Bring the SNP name to GWAS output

For this analysis we have to build this new sheet bringing SNP ID from snpmap.txt

gwas = read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval")
colnames(gwas) <- c("V1", "V2", "LOG_P", "SNP", "CHR", "BP")
gwas <- filter(gwas, LOG_P >= bonf)

snpmap <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/snpmap.txt", header = T)

# Filter snpmap to only include rows that match the CHR and BP values in out_genes
filtered_snpmap <- snpmap[snpmap$CHR %in% gwas$CHR & snpmap$POS %in% gwas$BP, ]

# Merge the filtered snpmap with out_genes
gwas_snpname <- merge(gwas, filtered_snpmap[, c("CHR", "POS", "SNP_ID")], 
                   by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), 
                   all.x = TRUE)

gwas_snpname <- gwas_snpname[,c("CHR", "BP", "LOG_P", "SNP_ID")]

write.csv(gwas_snpname, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval_SNPid_ind_seg_sig_BLUPF90.csv")
X CHR BP LOG_P SNP_ID
1 11 19779915 4.828190 Hapmap55558-rs29013980
2 12 17477322 4.417576 BTB-00488482
3 14 15929822 5.467439 ARS-BFGL-NGS-82859
4 15 33066384 5.041271 BTB-00594449
5 15 57930281 4.514377 ARS-BFGL-NGS-30515
6 17 46487068 4.547766 ARS-BFGL-NGS-12510
7 17 47884497 6.622354 ARS-BFGL-NGS-87412
8 17 48800954 5.268940 ARS-BFGL-NGS-112149
9 2 118820218 4.420962 Hapmap53065-rs29026778
10 2 41602429 4.497977 BTB-00096979
11 2 41670655 4.377284 Hapmap48777-BTA-47434
12 20 60365668 4.603817 BTB-01341053
13 20 65351653 4.891206 ARS-BFGL-NGS-91119
14 21 4055731 4.493938 ARS-BFGL-NGS-112210
15 23 44822126 4.316434 ARS-BFGL-NGS-115605
16 24 857728 4.458018 Hapmap47669-BTA-59022
17 28 20016672 5.130557 ARS-BFGL-NGS-116552
18 28 35807366 4.695875 ARS-BFGL-NGS-71077
19 29 33039863 4.933140 ARS-BFGL-NGS-41631
20 3 110866523 4.815943 ARS-BFGL-NGS-118207
21 3 111526170 4.760351 ARS-BFGL-NGS-74948
22 3 111730561 4.390707 BTB-01641394
23 3 111751663 4.390707 ARS-BFGL-NGS-37809
24 3 111772736 5.088738 ARS-BFGL-NGS-25298
25 4 103979600 5.591006 ARS-BFGL-NGS-110705
26 4 107787202 5.375628 ARS-BFGL-NGS-45265
27 4 109568557 4.709882 Hapmap48062-BTA-72409
28 4 110053134 5.245598 UA-IFASA-2147
29 4 24239851 4.641241 Hapmap60681-rs29013301
30 4 25933587 4.352451 Hapmap50554-BTA-107048
31 4 26511448 4.537662 Hapmap59011-rs29027498
32 4 27021431 5.045447 Hapmap59743-rs29017061
33 4 27094376 5.485063 BTB-00170785
34 4 50755462 5.015905 ARS-BFGL-NGS-12139
35 4 7873471 4.610767 Hapmap60503-rs29018741
36 4 95650788 4.825103 Hapmap58854-rs29023486
37 6 36184467 4.758110 Hapmap23854-BTC-062412
38 7 110306791 4.609691 BTB-01148543
11.2.3.7.2 Step 02 = Bring the rsID to the file with SNP name.

After get the rsID from SNPCHIMP I produced this table with the code bellow:

rsid <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/SNPchimp_result_2150988331.tsv", header = T)

merged <- merge(rsid, gwas_snpname, by.x ="SNP_name", by.y ="SNP_ID")

colnames(merged)

merged <- merged[,c("SNP_name", "rs", "CHR", "BP", "LOG_P")]

colnames(merged) <- c("SNP_name", "rsID", "CHR", "BP", "LOG_P")

write.csv(merged, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/gwas_ind_seg_sig_SNPname_rsID.csv")
X SNP_name rsID CHR BP LOG_P
1 ARS-BFGL-NGS-110705 rs110079750 4 103979600 5.591006
2 ARS-BFGL-NGS-112149 rs109276211 17 48800954 5.268940
3 ARS-BFGL-NGS-112210 rs109631116 21 4055731 4.493938
4 ARS-BFGL-NGS-115605 rs42029843 23 44822126 4.316434
5 ARS-BFGL-NGS-116552 rs110428837 28 20016672 5.130557
6 ARS-BFGL-NGS-118207 rs110081798 3 110866523 4.815943
7 ARS-BFGL-NGS-12139 rs110160157 4 50755462 5.015905
8 ARS-BFGL-NGS-12510 rs110038841 17 46487068 4.547766
9 ARS-BFGL-NGS-25298 rs109868537 3 111772736 5.088738
10 ARS-BFGL-NGS-30515 rs110565206 15 57930281 4.514377
11 ARS-BFGL-NGS-37809 rs42751504 3 111751663 4.390707
12 ARS-BFGL-NGS-41631 rs110121846 29 33039863 4.933140
13 ARS-BFGL-NGS-45265 rs110935391 4 107787202 5.375628
14 ARS-BFGL-NGS-71077 rs109584097 28 35807366 4.695875
15 ARS-BFGL-NGS-74948 rs41585925 3 111526170 4.760351
16 ARS-BFGL-NGS-82859 rs110506037 14 15929822 5.467439
17 ARS-BFGL-NGS-87412 rs109273103 17 47884497 6.622354
18 ARS-BFGL-NGS-91119 rs109575643 20 65351653 4.891206
19 BTB-00096979 rs43305418 2 41602429 4.497977
20 BTB-00170785 rs43377276 4 27094376 5.485063
21 BTB-00488482 rs43691687 12 17477322 4.417576
22 BTB-00594449 rs41764450 15 33066384 5.041271
23 BTB-01148543 rs42305073 7 110306791 4.609691
24 BTB-01341053 rs42462935 20 60365668 4.603817
25 BTB-01641394 rs42752353 3 111730561 4.390707
26 Hapmap23854-BTC-062412 rs81154019 6 36184467 4.758110
27 Hapmap47669-BTA-59022 rs41645754 24 857728 4.458018
28 Hapmap48062-BTA-72409 rs41566051 4 109568557 4.709882
29 Hapmap48777-BTA-47434 rs41636137 2 41670655 4.377284
30 Hapmap50554-BTA-107048 rs41615935 4 25933587 4.352451
31 Hapmap53065-rs29026778 rs29026778 2 118820218 4.420962
32 Hapmap55558-rs29013980 rs29013980 11 19779915 4.828190
33 Hapmap58854-rs29023486 rs29023486 4 95650788 4.825103
34 Hapmap59011-rs29027498 rs29027498 4 26511448 4.537662
35 Hapmap59743-rs29017061 rs29017061 4 27021431 5.045447
36 Hapmap60503-rs29018741 rs29018741 4 7873471 4.610767
37 Hapmap60681-rs29013301 rs29013301 4 24239851 4.641241
38 UA-IFASA-2147 rs29012492 4 110053134 5.245598

11.3 BLUPF90+ WINDOWS

11.3.1 Running renf90_ssGWAS2_SNPeff_W_10.par for GWAS (WINDOWS)

The parameter card bellow we are going to run using the software postGSf90:

# BLUPF90 parameter file created by RENUMF90
DATAFILE
renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot
OPTION SNP_moving_average 10
OPTION windows_variance 10 
The parameter file above will generate the following files:
  • snp_sol
    • column 1: trait
    • column 2: effect
    • column 3: SNP
    • column 4: Chromosome
    • column 5: Position
    • column 6: SNP solution
    • column 7: weight
    • column 8: variance explained by n adjacents SNP (if OPTION windows_variance is used).
    • column 9: variance of the SNP solution (used to compute the p-value) (if OPTION snp_p_value is used)
  • snp_pred
    • contains allele frequencies + SNP effects
  • chrsnpvar
    • column 1: trait
    • column 2: effect
    • column 3: variance explained by n adjacents SNP
      • If windows size is 20, the proportion of variance assigned to SNP 1 is calculated from SNP 1 to 20, for SNP 2 it goes from 2 to 21
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • chrsnp_pval
    • column 1: trait
    • column 2: effect
    • column 3: -log10(p-value)
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position in bp
  • chrsnp
    • column 1: trait
    • column 2: effect
    • column 3: values of SNP effects to use in Manhattan plots → [abs(SNP_i)/SD(SNP)]
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • windows_segments
    • column 1: label
    • column 2: window size (number of SNP)
    • column 3: Start SNP number for the window
    • column 4: End SNP number for the window
    • column 5: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
  • windows_variance
    • column 1: trait
    • column 2: effect
    • column 3: Start SNP number or SNP name for the window
    • column 4: End SNP number or SNP name for the window
    • column 5: window size (number of SNP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
    • column 8: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 9: variance explained by n adjacents SNP
  • Vft1e3.R
  • Sft1e3.R
  • Pft1e3.R

Bellow we can see the SNPs that explain more than 0.5% of Genetic Variance

w_var <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/chrsnpvar", header = F)
w_var <- filter(w_var, V3 > 0.5)
colnames(w_var) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")
snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpmap.txt_clean", header = T)

# Fazer o merge baseado em duas condições: CHR e POS
merged_data <- merge(w_var, snp_map, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
w_var <- merged_data[,c("CHR", "BP", "Var", "SNP_ID")]

write.csv(w_var, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpID_w05.csv")

rsid <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/SNPchimp_result_1425506748.tsv", header = T)


merged <- merge(rsid, w_var, by.x ="SNP_name", by.y ="SNP_ID")

colnames(merged)

merged <- merged[,c("SNP_name", "rs", "CHR", "BP", "Var")]

colnames(merged) <- c("SNP_name", "rsID", "CHR", "BP", "Var")

write.csv(merged, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpID_w05_rsid_snpvar.csv")
X SNP_name rsID CHR BP Var
1 ARS-BFGL-BAC-28665 rs111010562 24 28487771 0.6899732
2 ARS-BFGL-BAC-35548 rs110100182 2 115665427 1.2851806
3 ARS-BFGL-BAC-7444 rs110491621 13 18558540 0.6630789
4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.1689686
5 ARS-BFGL-NGS-107330 rs109766798 2 116018639 0.5958551
6 ARS-BFGL-NGS-25298 rs109868537 3 111772736 1.2101267
7 ARS-BFGL-NGS-2713 rs41761360 15 34054485 0.5549892
8 ARS-BFGL-NGS-30337 rs110485060 2 115730530 1.3488469
9 ARS-BFGL-NGS-3276 rs110634531 20 12087403 0.5722138
10 ARS-BFGL-NGS-37809 rs42751504 3 111751663 1.0607739
11 ARS-BFGL-NGS-43721 rs108974471 2 115986085 0.8178265
12 ARS-BFGL-NGS-44131 rs110100483 3 111806406 1.0252690
13 ARS-BFGL-NGS-5141 rs110705087 24 28417928 0.5945940
14 ARS-BFGL-NGS-5976 rs41763278 15 34144843 0.5490537
15 ARS-BFGL-NGS-6202 rs110385521 3 111833768 1.1671762
16 ARS-BFGL-NGS-78615 rs110959523 20 12111883 0.5689079
17 ARS-BFGL-NGS-85333 rs110742206 3 111933069 0.6077153
18 ARS-BFGL-NGS-97849 rs110553601 3 111965305 0.7547885
19 ARS-BFGL-NGS-98724 rs109709275 15 34109962 0.6252263
20 BTA-25900-no-rs rs41575397 13 18509812 0.6364128
21 BTA-49096-no-rs rs41578131 2 115695003 1.6071883
22 BTA-73915-no-rs rs41648979 5 6312610 0.5103844
23 BTA-91816-no-rs rs41596771 15 11100934 0.9739683
24 BTB-01421844 rs42544667 15 11144666 0.7131750
25 BTB-01421892 rs42544714 15 11185546 0.6463689
26 BTB-01421934 rs42545356 15 11207429 0.8898567
27 BTB-01422008 rs42545430 15 11236303 0.6832306
28 BTB-01434227 rs42557533 10 50554831 0.6660810
29 BTB-01485274 rs42609685 24 28540641 0.8983644
30 BTB-01608944 rs42723390 15 10974997 1.1206945
31 BTB-01641394 rs42752353 3 111730561 0.9480214
32 BTB-01646599 rs42761380 24 28604672 0.5894579
33 BTB-01813405 rs42924913 15 10879514 0.5556342
34 BTB-01830390 rs42938737 15 10936002 1.0480277
35 BTB-01948148 rs43056622 3 111677167 0.7467347
36 BTB-02063964 rs43172105 15 10906064 0.8788211
37 Hapmap41888-BTA-49091 rs41645223 2 115622067 0.8268392
38 Hapmap42062-BTA-109789 rs41621207 3 111708236 1.2415855
39 Hapmap49833-BTA-103929 rs41603335 13 18386942 0.5282867
40 Hapmap50266-BTA-13664 rs29018622 13 18266073 0.6006650
41 Hapmap54770-rs29009608 rs29009608 2 115875702 0.7398417
42 Hapmap54981-rs29019846 rs29019846 24 28516684 0.7747524
43 Hapmap58887-rs29013502 rs29013502 24 28570245 0.7619213

11.3.1.1 Manhattan Plots for BLUPF90 Windows ssGWAS

# Set working directory
setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/")
getwd()

# Read and process data
yyy1 = read.table("chrsnpvar")
yyy  = yyy1[order(yyy1$V4),]
zzz  = yyy[ which(yyy$V1==1 & yyy$V2==3), ]
n    = nrow(zzz)
y    = zzz[,4]
x    = zzz[,3]
chr1 = zzz[,5]
chr  = NULL
pos  = NULL
for (i in unique(yyy$V5)) {
  zz     = yyy[yyy$V5==i,]
  key    = zz$V4 
  medio  = round(nrow(zz)/2,0)
  z      = key[medio]
  pos    = c(pos,z)
}

# Assign colors for chromosomes
chrn       = unique(yyy$V5)
one        = which(chr1%%4==0) 
two        = which(chr1%%4==1) 
three      = which(chr1%%4==2) 
four       = which(chr1%%4==3) 
chr[one]   = "darkgoldenrod"
chr[two]   = "darkorchid"
chr[three] = "blue"
chr[four]  = "forestgreen"

# Generate Manhattan plot and save to PNG
png(file = "Vft1e3_manplot_with_thresholds.png", 
    width = 20000, height = 10000, res = 600) # Configure width, height, and resolution

# Set plot parameters
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2, mar = c(5, 5, 4, 2))

# Create Manhattan plot
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP Variance explained by 10 adjacents SNP window", 
     xlab = "", ylab = "% variance expl", pch = 20, 
     xlim = c(1, n), ylim = c(0, max(x)), col = chr, cex.axis = 1.2)

# Add dashed lines for thresholds
abline(h = 0.5, col = "red", lwd = 2, lty = 2)  # Red dashed line at 0.5

# Add legend for thresholds
legend("topright", legend = c("Threshold 0.5%"), 
       col = "red", lwd = 2, lty = 2, cex = 1)

# Add chromosome labels on the X-axis
axis(1, at = pos, labels = chrn, las = 1, cex.axis = 0.8)

# Close the graphics device
dev.off()

My Image

11.3.2 Finding p-values for each SNP in the window

To bring the p-value for each snp in the window we need to use files from different approachs:
  • chrsnpvar:

    /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/chrsnpvar

    • which contains the variance explained by each window. This file is from windows approach
  • chrsnp_pval:

    /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval

    • which contains the p-value for each snp. This file is not from windows approach. Is for the approach which considers each SNP individually (beside fit all SNPs at the same time)
chrsnpvar <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/chrsnpvar", header = F)
chrsnpvar <- filter(chrsnpvar, V3 > 0.5)
colnames(chrsnpvar) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")

# as the column 3: -log10(p-value) of this file doesn't say that this p-value is for the window, I'm assuming that this p-value is for individual SNP
chrsnp_pval <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/chrsnp_pval", header = F)
colnames(chrsnp_pval) <- c("trait", "effect", "-log10(p-value)", "SNP", "Chromosome", "Position in bp")

snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpmap.txt_clean", header = T)

#####

# Bringing p-value for each snp in the window

# Create an empty row with the same column names
empty_row <- chrsnpvar[1, ]
empty_row[,] <- NA  # Set all values to NA

# Create a new dataframe with gaps
expanded_df <- do.call(rbind, lapply(1:nrow(chrsnpvar), function(i) {
  rbind(chrsnpvar[i, ], empty_row[rep(1, 9), ])
}))

# Reset row names
rownames(expanded_df) <- NULL

# Ensure the SNP column is numeric
expanded_df$SNP <- as.numeric(expanded_df$SNP)

# Loop through the dataframe and fill empty SNP values with sequential numbers
for (i in seq_len(nrow(expanded_df))) {
  if (is.na(expanded_df$SNP[i])) {
    expanded_df$SNP[i] <- expanded_df$SNP[i - 1] + 1
  }
}


# Assuming your data frame is called df
expanded_df$index <- 1:nrow(expanded_df)
merged_pvalue_var <- merge(chrsnp_pval, expanded_df, by = "SNP")
merged_pvalue_var <- arrange(merged_pvalue_var, index)

merged_pvalue_var$p_value <- 10^(-merged_pvalue_var$`-log10(p-value)`)

merged_pvalue_var <- merge(merged_pvalue_var, snp_map, by.x = c("Chromosome", "Position in bp"), by.y = c("CHR", "POS"), all.x = TRUE)

merged_pvalue_var <- arrange(merged_pvalue_var, index)

pvalue_var_allsnps <- merged_pvalue_var[, c("index", "Chromosome", "Position in bp",
                                            "SNP", "SNP_ID", "-log10(p-value)",
                                            "p_value", "Var")]

write.csv(pvalue_var_allsnps, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/pval_for_each_snp_in_window.csv")

The p-value for each SNP in the window is bellow:

## Warning: package 'readxl' was built under R version 4.3.3
## Warning: package 'knitr' was built under R version 4.3.3
Chr Position in bp SNP SNP_ID -LOG10(p-value) p_value Var
2 115622067 4582 Hapmap41888-BTA-49091 0.4075785 0.3912204 0.80407084600000001
2 115665427 4583 ARS-BFGL-BAC-35548 2.1953425 0.0063776 NA
2 115695003 4584 BTA-49096-no-rs 0.0488671 0.8935789 NA
2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 NA
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 115665427 4583 ARS-BFGL-BAC-35548 2.1953425 0.0063776 1.2237597850999999
2 115695003 4584 BTA-49096-no-rs 0.0488671 0.8935789 NA
2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 NA
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 115695003 4584 BTA-49096-no-rs 0.0488671 0.8935789 1.5221060874000001
2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 NA
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 115730530 4585 ARS-BFGL-NGS-30337 2.5552443 0.0027846 1.2853203491
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 115821065 4586 ARS-BFGL-NGS-103753 1.8440522 0.0143202 1.0965188597
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
2 115875702 4587 Hapmap54770-rs29009608 1.8678731 0.0135559 0.70738053840000004
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
2 116395928 4596 ARS-BFGL-NGS-33237 0.4679789 0.3404248 NA
2 115986085 4588 ARS-BFGL-NGS-43721 2.7746365 0.0016802 0.78571400749999998
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 NA
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
2 116395928 4596 ARS-BFGL-NGS-33237 0.4679789 0.3404248 NA
2 116423023 4597 BTA-95472-no-rs 0.9326654 0.1167709 NA
2 116018639 4589 ARS-BFGL-NGS-107330 2.3463508 0.0045045 0.5887867363
2 116064090 4590 Hapmap38323-BTA-49132 2.3463508 0.0045045 NA
2 116123392 4591 ARS-BFGL-NGS-107909 1.1792513 0.0661833 NA
2 116152065 4592 ARS-BFGL-NGS-19012 1.1792513 0.0661833 NA
2 116186918 4593 BTA-103078-no-rs 1.1913189 0.0643696 NA
2 116211153 4594 ARS-BFGL-NGS-112195 0.1517643 0.7050757 NA
2 116340519 4595 Hapmap44637-BTA-17098 0.1117605 0.7731068 NA
2 116395928 4596 ARS-BFGL-NGS-33237 0.4679789 0.3404248 NA
2 116423023 4597 BTA-95472-no-rs 0.9326654 0.1167709 NA
2 116430317 4598 Hapmap54608-rs29020417 1.0790898 0.0833509 NA
3 111677167 6878 BTB-01948148 1.0946657 0.0804145 0.68715106829999995
3 111708236 6879 Hapmap42062-BTA-109789 1.2400438 0.0575382 NA
3 111730561 6880 BTB-01641394 4.1432148 0.0000719 NA
3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 NA
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 111708236 6879 Hapmap42062-BTA-109789 1.2400438 0.0575382 1.0996405039999999
3 111730561 6880 BTB-01641394 4.1432148 0.0000719 NA
3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 NA
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 111730561 6880 BTB-01641394 4.1432148 0.0000719 0.85417720109999995
3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 NA
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 111751663 6881 ARS-BFGL-NGS-37809 4.1432148 0.0000719 0.93515725350000001
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 111772736 6882 ARS-BFGL-NGS-25298 5.3052523 0.0000050 1.0283717422
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
3 111806406 6883 ARS-BFGL-NGS-44131 0.1289040 0.7431834 0.85768353340000003
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 NA
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
3 112313178 6892 ARS-BFGL-NGS-19581 0.8847032 0.1304058 NA
3 111833768 6884 ARS-BFGL-NGS-6202 3.2035027 0.0006259 0.98203344020000005
3 111933069 6885 ARS-BFGL-NGS-85333 4.3712593 0.0000425 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 NA
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
3 112313178 6892 ARS-BFGL-NGS-19581 0.8847032 0.1304058 NA
3 112345942 6893 ARS-BFGL-NGS-55894 0.2919687 0.5105418 NA
3 111965305 6886 ARS-BFGL-NGS-97849 1.9106701 0.0122837 0.61040931220000005
3 111991216 6887 ARS-BFGL-NGS-71345 1.9106701 0.0122837 NA
3 112064027 6888 ARS-BFGL-NGS-110683 2.3824353 0.0041454 NA
3 112192839 6889 BTA-87451-no-rs 1.9682118 0.0107594 NA
3 112221070 6890 ARS-BFGL-NGS-18163 1.6332310 0.0232685 NA
3 112257692 6891 ARS-BFGL-NGS-115246 0.3386287 0.4585338 NA
3 112313178 6892 ARS-BFGL-NGS-19581 0.8847032 0.1304058 NA
3 112345942 6893 ARS-BFGL-NGS-55894 0.2919687 0.5105418 NA
3 112427851 6894 ARS-BFGL-NGS-65128 0.1144700 0.7682985 NA
3 112662102 6895 BTA-117752-no-rs 1.4158696 0.0383822 NA
6 8630173 11046 BTB-01068898 3.4115993 0.0003876 0.55639127330000004
6 8727520 11047 Hapmap33379-BTA-153882 0.2360505 0.5806969 NA
6 8794456 11048 Hapmap44182-BTA-104972 0.2803710 0.5243594 NA
6 8881200 11049 ARS-BFGL-NGS-54017 2.2115780 0.0061436 NA
6 9049173 11050 BTB-01645485 0.0063533 0.9854774 NA
6 9142299 11051 BTB-01069210 0.1114894 0.7735895 NA
6 9166519 11052 BTB-01069292 1.9594612 0.0109784 NA
6 9189316 11053 BTB-02016906 1.9594612 0.0109784 NA
6 9216946 11054 BTB-01718894 2.5236403 0.0029947 NA
6 9272547 11055 BTB-01718868 1.5012275 0.0315335 NA
6 9142299 11051 BTB-01069210 0.1114894 0.7735895 0.51576907869999999
6 9166519 11052 BTB-01069292 1.9594612 0.0109784 NA
6 9189316 11053 BTB-02016906 1.9594612 0.0109784 NA
6 9216946 11054 BTB-01718894 2.5236403 0.0029947 NA
6 9272547 11055 BTB-01718868 1.5012275 0.0315335 NA
6 9387456 11056 BTB-01939106 0.0631806 0.8646083 NA
6 9418094 11057 BTB-01322180 0.2428379 0.5716920 NA
6 9453914 11058 BTB-01322092 0.5623756 0.2739204 NA
6 9483232 11059 BTA-10940-no-rs 0.7251282 0.1883093 NA
6 9504486 11060 BTB-01322000 0.7154267 0.1925632 NA
8 55218811 15704 Hapmap54541-rs29014049 1.2509697 0.0561087 0.69979568290000005
8 55298755 15705 BTA-107975-no-rs 0.2434081 0.5709418 NA
8 55323946 15706 BTB-01616745 0.5026370 0.3143135 NA
8 55376512 15707 ARS-BFGL-NGS-76732 3.7920659 0.0001614 NA
8 55433879 15708 BTB-02017947 2.7518384 0.0017708 NA
8 55458002 15709 Hapmap51811-BTA-119735 1.8587790 0.0138427 NA
8 55493961 15710 BTB-01258879 3.0361970 0.0009200 NA
8 55517877 15711 ARS-BFGL-NGS-37680 1.5463493 0.0284217 NA
8 55552514 15712 ARS-BFGL-NGS-74002 2.6498886 0.0022393 NA
8 55603363 15713 ARS-BFGL-NGS-108258 3.0361970 0.0009200 NA
9 10112014 16913 ARS-BFGL-NGS-57285 1.7786476 0.0166476 0.50607275890000003
9 10139748 16914 ARS-BFGL-NGS-37889 1.6229291 0.0238271 NA
9 10221793 16915 ARS-BFGL-NGS-93995 0.7484987 0.1784437 NA
9 10260399 16916 ARS-BFGL-NGS-15511 0.7484987 0.1784437 NA
9 10337737 16917 BTA-83229-no-rs 0.2705532 0.5363481 NA
9 10490301 16918 ARS-BFGL-NGS-110691 0.1495558 0.7086703 NA
9 10529588 16919 Hapmap36170-SCAFFOLD200128_32745 0.2984731 0.5029524 NA
9 10601391 16920 ARS-BFGL-NGS-10202 0.0295204 0.9342856 NA
9 10622057 16921 Hapmap38633-BTA-83140 0.0528145 0.8854937 NA
9 10724264 16922 ARS-BFGL-NGS-17513 0.2137440 0.6113022 NA
13 18266073 23616 Hapmap50266-BTA-13664 0.3810341 0.4158780 0.58429790559999994
13 18386942 23617 Hapmap49833-BTA-103929 0.4177720 0.3821448 NA
13 18489670 23618 ARS-BFGL-NGS-21967 0.3601825 0.4363324 NA
13 18509812 23619 BTA-25900-no-rs 1.3093175 0.0490549 NA
13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 NA
13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
13 18386942 23617 Hapmap49833-BTA-103929 0.4177720 0.3821448 0.50856768149999998
13 18489670 23618 ARS-BFGL-NGS-21967 0.3601825 0.4363324 NA
13 18509812 23619 BTA-25900-no-rs 1.3093175 0.0490549 NA
13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 NA
13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
13 18822522 23626 ARS-BFGL-NGS-13518 1.3218013 0.0476649 NA
13 18509812 23619 BTA-25900-no-rs 1.3093175 0.0490549 0.60180061750000002
13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 NA
13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
13 18822522 23626 ARS-BFGL-NGS-13518 1.3218013 0.0476649 NA
13 18870958 23627 ARS-BFGL-NGS-68568 0.3402232 0.4568534 NA
13 18892498 23628 ARS-BFGL-NGS-109707 1.8542890 0.0139866 NA
13 18558540 23620 ARS-BFGL-BAC-7444 1.2573005 0.0552967 0.60402666900000002
13 18626070 23621 ARS-BFGL-NGS-116274 1.2573005 0.0552967 NA
13 18676544 23622 BTA-31832-no-rs 1.1165798 0.0764575 NA
13 18720587 23623 Hapmap43233-BTA-31838 0.7767939 0.1671884 NA
13 18755905 23624 BTA-10553-rs29016106 0.8913772 0.1284171 NA
13 18792716 23625 ARS-BFGL-NGS-72609 1.2362833 0.0580386 NA
13 18822522 23626 ARS-BFGL-NGS-13518 1.3218013 0.0476649 NA
13 18870958 23627 ARS-BFGL-NGS-68568 0.3402232 0.4568534 NA
13 18892498 23628 ARS-BFGL-NGS-109707 1.8542890 0.0139866 NA
13 18952584 23629 ARS-BFGL-NGS-116613 1.2880171 0.0515208 NA
15 10974997 26385 BTB-01608944 1.3520801 0.0444549 0.51261892730000003
15 11100934 26386 BTA-91816-no-rs 0.4733557 0.3362360 NA
15 11144666 26387 BTB-01421844 2.0829468 0.0082614 NA
15 11185546 26388 BTB-01421892 0.4733557 0.3362360 NA
15 11207429 26389 BTB-01421934 1.0078060 0.0982187 NA
15 11236303 26390 BTB-01422008 1.0078060 0.0982187 NA
15 11310260 26391 BTA-91820-no-rs 1.0078060 0.0982187 NA
15 11451408 26392 Hapmap40037-BTA-100798 0.9018532 0.1253565 NA
15 11487557 26393 BTA-94770-no-rs 0.0029926 0.9931329 NA
15 11550646 26394 ARS-BFGL-NGS-27782 0.8776148 0.1325517 NA
16 6121692 27745 BTA-95363-no-rs 1.4433689 0.0360272 0.59678898970000005
16 6308221 27746 BTB-01975868 3.5022880 0.0003146 NA
16 6329170 27747 ARS-BFGL-NGS-89740 2.8227273 0.0015041 NA
16 6421207 27748 BTB-01200020 1.6187843 0.0240556 NA
16 6446024 27749 BTB-01200008 1.4569926 0.0349146 NA
16 6529002 27750 Hapmap52413-rs29016330 0.7722977 0.1689283 NA
16 6549899 27751 ARS-BFGL-NGS-13802 1.4433689 0.0360272 NA
16 6614897 27752 BTA-109095-no-rs 2.1726638 0.0067195 NA
16 6656521 27753 Hapmap51387-BTA-109077 2.1726638 0.0067195 NA
16 6781530 27754 Hapmap33722-BTA-155362 1.4427417 0.0360793 NA
23 14185501 36197 ARS-BFGL-NGS-34854 1.6567406 0.0220424 0.53129124309999998
23 14208471 36198 ARS-BFGL-NGS-100006 1.6567406 0.0220424 NA
23 14239531 36199 ARS-BFGL-NGS-29516 1.5126341 0.0307161 NA
23 14306746 36200 ARS-BFGL-NGS-78514 1.5126341 0.0307161 NA
23 14329178 36201 ARS-BFGL-NGS-113147 1.5217814 0.0300759 NA
23 14365537 36202 ARS-BFGL-NGS-105392 0.8997870 0.1259543 NA
23 14387143 36203 ARS-BFGL-NGS-59308 1.6397377 0.0229225 NA
23 14416017 36204 ARS-BFGL-NGS-8960 0.7575833 0.1747498 NA
23 14487014 36205 ARS-BFGL-NGS-115470 0.8312254 0.1474941 NA
23 14571804 36206 Hapmap30534-BTA-137081 2.5196142 0.0030226 NA
24 28487771 37343 ARS-BFGL-BAC-28665 0.5617269 0.2743298 0.56018426669999999
24 28516684 37344 Hapmap54981-rs29019846 0.8746587 0.1334570 NA
24 28540641 37345 BTB-01485274 1.7567714 0.0175077 NA
24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 NA
24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
24 28516684 37344 Hapmap54981-rs29019846 0.8746587 0.1334570 0.61500002899999995
24 28540641 37345 BTB-01485274 1.7567714 0.0175077 NA
24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 NA
24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
24 28850850 37353 Hapmap54558-rs29009598 0.3031365 0.4975807 NA
24 28540641 37345 BTB-01485274 1.7567714 0.0175077 0.71034974449999999
24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 NA
24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
24 28850850 37353 Hapmap54558-rs29009598 0.3031365 0.4975807 NA
24 28894159 37354 BTA-57730-no-rs 0.8767144 0.1328268 NA
24 28570245 37346 Hapmap58887-rs29013502 1.2778933 0.0527359 0.57945177729999997
24 28604672 37347 BTB-01646599 1.7567714 0.0175077 NA
24 28638911 37348 ARS-BFGL-NGS-41140 1.5164166 0.0304497 NA
24 28676366 37349 ARS-BFGL-NGS-35716 1.0514307 0.0888320 NA
24 28703207 37350 BTB-00884791 1.0514307 0.0888320 NA
24 28731501 37351 ARS-BFGL-NGS-118806 1.8097285 0.0154979 NA
24 28795202 37352 BTB-00883964 1.4618598 0.0345255 NA
24 28850850 37353 Hapmap54558-rs29009598 0.3031365 0.4975807 NA
24 28894159 37354 BTA-57730-no-rs 0.8767144 0.1328268 NA
24 28963905 37355 BTA-93775-no-rs 0.0204343 0.9540380 NA
27 16546003 39915 ARS-BFGL-NGS-97489 2.1349421 0.0073292 0.52664546609999996
27 16628800 39916 Hapmap4685-BTA-29671 2.9562453 0.0011060 NA
27 16663290 39917 BTB-00952622 4.3841922 0.0000413 NA
27 16692516 39918 ARS-BFGL-NGS-58358 4.3841922 0.0000413 NA
27 16749902 39919 ARS-BFGL-NGS-34807 2.2355529 0.0058136 NA
27 16778234 39920 ARS-BFGL-NGS-13660 1.2061604 0.0622070 NA
27 16796562 39921 UA-IFASA-3305 0.1932988 0.6407686 NA
27 16818014 39922 ARS-BFGL-NGS-38547 1.2061604 0.0622070 NA
27 16946270 39923 ARS-BFGL-NGS-45781 2.1459210 0.0071463 NA
27 17051893 39924 ARS-BFGL-NGS-13285 1.2554968 0.0555269 NA
27 16663290 39917 BTB-00952622 4.3841922 0.0000413 0.58911050239999996
27 16692516 39918 ARS-BFGL-NGS-58358 4.3841922 0.0000413 NA
27 16749902 39919 ARS-BFGL-NGS-34807 2.2355529 0.0058136 NA
27 16778234 39920 ARS-BFGL-NGS-13660 1.2061604 0.0622070 NA
27 16796562 39921 UA-IFASA-3305 0.1932988 0.6407686 NA
27 16818014 39922 ARS-BFGL-NGS-38547 1.2061604 0.0622070 NA
27 16946270 39923 ARS-BFGL-NGS-45781 2.1459210 0.0071463 NA
27 17051893 39924 ARS-BFGL-NGS-13285 1.2554968 0.0555269 NA
27 17083748 39925 BTB-00958271 0.5040644 0.3132821 NA
27 17109301 39926 Hapmap25499-BTA-123322 0.2758655 0.5298275 NA
27 16692516 39918 ARS-BFGL-NGS-58358 4.3841922 0.0000413 0.55759268129999995
27 16749902 39919 ARS-BFGL-NGS-34807 2.2355529 0.0058136 NA
27 16778234 39920 ARS-BFGL-NGS-13660 1.2061604 0.0622070 NA
27 16796562 39921 UA-IFASA-3305 0.1932988 0.6407686 NA
27 16818014 39922 ARS-BFGL-NGS-38547 1.2061604 0.0622070 NA
27 16946270 39923 ARS-BFGL-NGS-45781 2.1459210 0.0071463 NA
27 17051893 39924 ARS-BFGL-NGS-13285 1.2554968 0.0555269 NA
27 17083748 39925 BTB-00958271 0.5040644 0.3132821 NA
27 17109301 39926 Hapmap25499-BTA-123322 0.2758655 0.5298275 NA
27 17149375 39927 ARS-BFGL-NGS-55767 0.1452961 0.7156553 NA

11.3.3 0.5% Windows - BLUPF90+ GALLO

gwas <- read.csv("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/keep_all_pheno/windows_10/snpID_w05_rsid_snpvar.csv")
colnames(gwas)
colnames(gwas) <- c("X", "SNP", "rsID", "CHR", "BP", "Var")


# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/out_genes_w_05.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/out_qtl_w_05.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("SNP", "rsID", "CHR", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/keep_all_pheno/windows_10/out_qtl_w_05_clean.csv")

The GALLO output are bellow:

For GENES

X.1 X SNP rsID CHR BP Var chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 1 ARS-BFGL-BAC-28665 rs111010562 24 28487771 0.6899732 24 28485983 28486143 161 + ENSBTAG00000028575 U1 snRNA
2 42 Hapmap54981-rs29019846 rs29019846 24 28516684 0.7747524 24 28485983 28486143 161 + ENSBTAG00000028575 U1 snRNA
3 4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.1689686 2 115812583 115843935 31353 - ENSBTAG00000021325 SLC19A3 protein_coding
4 41 Hapmap54770-rs29009608 rs29009608 2 115875702 0.7398417 2 115812583 115843935 31353 - ENSBTAG00000021325 SLC19A3 protein_coding
5 4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.1689686 2 115838391 115840022 1632 + ENSBTAG00000007127 NA protein_coding
6 41 Hapmap54770-rs29009608 rs29009608 2 115875702 0.7398417 2 115838391 115840022 1632 + ENSBTAG00000007127 NA protein_coding
7 11 ARS-BFGL-NGS-43721 rs108974471 2 115986085 0.8178265 2 115948258 115951955 3698 + ENSBTAG00000021326 CCL20 protein_coding
8 11 ARS-BFGL-NGS-43721 rs108974471 2 115986085 0.8178265 2 115992779 116028253 35475 + ENSBTAG00000021327 DAW1 protein_coding
9 5 ARS-BFGL-NGS-107330 rs109766798 2 116018639 0.5958551 2 115992779 116028253 35475 + ENSBTAG00000021327 DAW1 protein_coding
10 37 Hapmap41888-BTA-49091 rs41645223 2 115622067 0.8268392 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
11 2 ARS-BFGL-BAC-35548 rs110100182 2 115665427 1.2851806 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
12 21 BTA-49096-no-rs rs41578131 2 115695003 1.6071883 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
13 8 ARS-BFGL-NGS-30337 rs110485060 2 115730530 1.3488469 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
14 8 ARS-BFGL-NGS-30337 rs110485060 2 115730530 1.3488469 2 115758046 115758105 60 + ENSBTAG00000054571 bta-mir-2285ao-1 miRNA
15 4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.1689686 2 115791504 115808606 17103 + ENSBTAG00000050771 NA lncRNA
16 40 Hapmap50266-BTA-13664 rs29018622 13 18266073 0.6006650 13 18226732 18280982 54251 - ENSBTAG00000016060 CREM protein_coding
17 39 Hapmap49833-BTA-103929 rs41603335 13 18386942 0.5282867 13 18321888 18393476 71589 + ENSBTAG00000007133 CUL2 protein_coding
18 20 BTA-25900-no-rs rs41575397 13 18509812 0.6364128 13 18449975 18466359 16385 + ENSBTAG00000052242 NA lncRNA
19 20 BTA-25900-no-rs rs41575397 13 18509812 0.6364128 13 18484975 19062784 577810 + ENSBTAG00000014991 PARD3 protein_coding
20 3 ARS-BFGL-BAC-7444 rs110491621 13 18558540 0.6630789 13 18484975 19062784 577810 + ENSBTAG00000014991 PARD3 protein_coding
21 35 BTB-01948148 rs43056622 3 111677167 0.7467347 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
22 38 Hapmap42062-BTA-109789 rs41621207 3 111708236 1.2415855 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
23 31 BTB-01641394 rs42752353 3 111730561 0.9480214 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
24 10 ARS-BFGL-NGS-37809 rs42751504 3 111751663 1.0607739 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
25 6 ARS-BFGL-NGS-25298 rs109868537 3 111772736 1.2101267 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
26 12 ARS-BFGL-NGS-44131 rs110100483 3 111806406 1.0252690 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
27 15 ARS-BFGL-NGS-6202 rs110385521 3 111833768 1.1671762 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
28 17 ARS-BFGL-NGS-85333 rs110742206 3 111933069 0.6077153 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
29 18 ARS-BFGL-NGS-97849 rs110553601 3 111965305 0.7547885 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
30 17 ARS-BFGL-NGS-85333 rs110742206 3 111933069 0.6077153 3 111927003 111927727 725 - ENSBTAG00000000335 HMGB4 protein_coding
31 18 ARS-BFGL-NGS-97849 rs110553601 3 111965305 0.7547885 3 111927003 111927727 725 - ENSBTAG00000000335 HMGB4 protein_coding
32 7 ARS-BFGL-NGS-2713 rs41761360 15 34054485 0.5549892 15 34027052 34225316 198265 + ENSBTAG00000001410 GRAMD1B protein_coding
33 19 ARS-BFGL-NGS-98724 rs109709275 15 34109962 0.6252263 15 34027052 34225316 198265 + ENSBTAG00000001410 GRAMD1B protein_coding
34 14 ARS-BFGL-NGS-5976 rs41763278 15 34144843 0.5490537 15 34027052 34225316 198265 + ENSBTAG00000001410 GRAMD1B protein_coding
35 22 BTA-73915-no-rs rs41648979 5 6312610 0.5103844 5 6174722 6273171 98450 + ENSBTAG00000021756 ZDHHC17 protein_coding
36 22 BTA-73915-no-rs rs41648979 5 6312610 0.5103844 5 6281008 6300975 19968 - ENSBTAG00000013406 CSRP2 protein_coding
37 28 BTB-01434227 rs42557533 10 50554831 0.6660810 10 50557150 50607672 50523 + ENSBTAG00000013493 BNIP2 protein_coding
38 28 BTB-01434227 rs42557533 10 50554831 0.6660810 10 50582585 50614948 32364 + ENSBTAG00000010298 GTF2A2 protein_coding
39 28 BTB-01434227 rs42557533 10 50554831 0.6660810 10 50584841 50584967 127 - ENSBTAG00000043816 NA snoRNA

FOR QTLs

X SNP rsID CHR QTL_type start_pos end_pos QTL_ID
1 ARS-BFGL-BAC-28665 rs111010562 24 Meat_and_Carcass 28473214 28473218 232857
2 Hapmap54981-rs29019846 rs29019846 24 Meat_and_Carcass 28473214 28473218 232857
3 BTB-01485274 rs42609685 24 Health 28570243 28570247 57038
4 Hapmap58887-rs29013502 rs29013502 24 Health 28570243 28570247 57038
5 BTB-01646599 rs42761380 24 Health 28570243 28570247 57038
6 BTB-01485274 rs42609685 24 Health 28570243 28570247 57040
7 Hapmap58887-rs29013502 rs29013502 24 Health 28570243 28570247 57040
8 BTB-01646599 rs42761380 24 Health 28570243 28570247 57040
9 BTB-01485274 rs42609685 24 Production 28570243 28570247 69281
10 Hapmap58887-rs29013502 rs29013502 24 Production 28570243 28570247 69281
11 BTB-01646599 rs42761380 24 Production 28570243 28570247 69281
12 Hapmap58887-rs29013502 rs29013502 24 Reproduction 28604670 28604674 138598
13 BTB-01646599 rs42761380 24 Reproduction 28604670 28604674 138598
14 ARS-BFGL-BAC-35548 rs110100182 2 Production 115695001 115695005 283322
15 BTA-49096-no-rs rs41578131 2 Production 115695001 115695005 283322
16 ARS-BFGL-NGS-30337 rs110485060 2 Production 115695001 115695005 283322
17 ARS-BFGL-NGS-103753 rs110842922 2 Milk 115820324 115820328 215425
18 ARS-BFGL-NGS-103753 rs110842922 2 Exterior 115839213 115839217 125900
19 Hapmap54770-rs29009608 rs29009608 2 Exterior 115839213 115839217 125900
20 Hapmap54770-rs29009608 rs29009608 2 Milk 115909335 115909339 155904
21 Hapmap54770-rs29009608 rs29009608 2 Milk 115909359 115909363 155914
22 ARS-BFGL-NGS-43721 rs108974471 2 Production 115986083 115986087 39013
23 ARS-BFGL-NGS-107330 rs109766798 2 Production 115986083 115986087 39013
24 ARS-BFGL-NGS-43721 rs108974471 2 Reproduction 115986083 115986087 39014
25 ARS-BFGL-NGS-107330 rs109766798 2 Reproduction 115986083 115986087 39014
26 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39015
27 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39015
28 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39016
29 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39016
30 ARS-BFGL-NGS-43721 rs108974471 2 Milk 115986083 115986087 39017
31 ARS-BFGL-NGS-107330 rs109766798 2 Milk 115986083 115986087 39017
32 ARS-BFGL-NGS-43721 rs108974471 2 Production 115986083 115986087 39018
33 ARS-BFGL-NGS-107330 rs109766798 2 Production 115986083 115986087 39018
34 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39019
35 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39019
36 ARS-BFGL-NGS-43721 rs108974471 2 Milk 115986083 115986087 39020
37 ARS-BFGL-NGS-107330 rs109766798 2 Milk 115986083 115986087 39020
38 ARS-BFGL-NGS-43721 rs108974471 2 Production 115986083 115986087 39021
39 ARS-BFGL-NGS-107330 rs109766798 2 Production 115986083 115986087 39021
40 ARS-BFGL-NGS-43721 rs108974471 2 Production 115986083 115986087 39022
41 ARS-BFGL-NGS-107330 rs109766798 2 Production 115986083 115986087 39022
42 ARS-BFGL-NGS-43721 rs108974471 2 Milk 115986083 115986087 39023
43 ARS-BFGL-NGS-107330 rs109766798 2 Milk 115986083 115986087 39023
44 ARS-BFGL-NGS-43721 rs108974471 2 Milk 115986083 115986087 39024
45 ARS-BFGL-NGS-107330 rs109766798 2 Milk 115986083 115986087 39024
46 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39025
47 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39025
48 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39026
49 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39026
50 ARS-BFGL-NGS-43721 rs108974471 2 Reproduction 115986083 115986087 39027
51 ARS-BFGL-NGS-107330 rs109766798 2 Reproduction 115986083 115986087 39027
52 ARS-BFGL-NGS-43721 rs108974471 2 Exterior 115986083 115986087 39028
53 ARS-BFGL-NGS-107330 rs109766798 2 Exterior 115986083 115986087 39028
54 Hapmap49833-BTA-103929 rs41603335 13 Milk 18340131 18340135 116582
55 Hapmap49833-BTA-103929 rs41603335 13 Milk 18349683 18349687 116735
56 Hapmap49833-BTA-103929 rs41603335 13 Milk 18370805 18370809 249726
57 Hapmap49833-BTA-103929 rs41603335 13 Milk 18381731 18381735 116525
58 Hapmap49833-BTA-103929 rs41603335 13 Reproduction 18386940 18386944 46808
59 Hapmap49833-BTA-103929 rs41603335 13 Reproduction 18386940 18386944 46809
60 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46810
61 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46811
62 Hapmap49833-BTA-103929 rs41603335 13 Production 18386940 18386944 46812
63 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46813
64 Hapmap49833-BTA-103929 rs41603335 13 Milk 18386940 18386944 46814
65 Hapmap49833-BTA-103929 rs41603335 13 Production 18386940 18386944 46815
66 Hapmap49833-BTA-103929 rs41603335 13 Production 18386940 18386944 46816
67 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46817
68 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46818
69 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46819
70 Hapmap49833-BTA-103929 rs41603335 13 Reproduction 18386940 18386944 46820
71 Hapmap49833-BTA-103929 rs41603335 13 Reproduction 18386940 18386944 46821
72 Hapmap49833-BTA-103929 rs41603335 13 Exterior 18386940 18386944 46822
73 Hapmap49833-BTA-103929 rs41603335 13 Meat_and_Carcass 18386940 18386944 151596
74 Hapmap49833-BTA-103929 rs41603335 13 Meat_and_Carcass 18392222 18392226 226401
75 Hapmap49833-BTA-103929 rs41603335 13 Meat_and_Carcass 18392222 18392226 228662
76 Hapmap49833-BTA-103929 rs41603335 13 Meat_and_Carcass 18392222 18392226 234093
77 Hapmap49833-BTA-103929 rs41603335 13 Milk 18406821 18406825 116526
78 BTA-25900-no-rs rs41575397 13 Reproduction 18489668 18489672 46823
79 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46824
80 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46825
81 BTA-25900-no-rs rs41575397 13 Milk 18489668 18489672 46826
82 BTA-25900-no-rs rs41575397 13 Production 18489668 18489672 46827
83 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46828
84 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46829
85 BTA-25900-no-rs rs41575397 13 Production 18489668 18489672 46830
86 BTA-25900-no-rs rs41575397 13 Production 18489668 18489672 46831
87 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46832
88 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46833
89 BTA-25900-no-rs rs41575397 13 Reproduction 18489668 18489672 46834
90 BTA-25900-no-rs rs41575397 13 Reproduction 18489668 18489672 46835
91 BTA-25900-no-rs rs41575397 13 Exterior 18489668 18489672 46836
92 BTA-25900-no-rs rs41575397 13 Reproduction 18509810 18509814 46837
93 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18509810 18509814 46837
94 BTA-25900-no-rs rs41575397 13 Reproduction 18509810 18509814 46838
95 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18509810 18509814 46838
96 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46839
97 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46839
98 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46840
99 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46840
100 BTA-25900-no-rs rs41575397 13 Production 18509810 18509814 46841
101 ARS-BFGL-BAC-7444 rs110491621 13 Production 18509810 18509814 46841
102 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46842
103 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46842
104 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46843
105 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46843
106 BTA-25900-no-rs rs41575397 13 Production 18509810 18509814 46844
107 ARS-BFGL-BAC-7444 rs110491621 13 Production 18509810 18509814 46844
108 BTA-25900-no-rs rs41575397 13 Production 18509810 18509814 46845
109 ARS-BFGL-BAC-7444 rs110491621 13 Production 18509810 18509814 46845
110 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46846
111 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46846
112 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46847
113 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46847
114 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46848
115 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46848
116 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46849
117 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46849
118 BTA-25900-no-rs rs41575397 13 Reproduction 18509810 18509814 46850
119 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18509810 18509814 46850
120 BTA-25900-no-rs rs41575397 13 Health 18509810 18509814 46851
121 ARS-BFGL-BAC-7444 rs110491621 13 Health 18509810 18509814 46851
122 BTA-25900-no-rs rs41575397 13 Reproduction 18509810 18509814 46852
123 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18509810 18509814 46852
124 BTA-25900-no-rs rs41575397 13 Exterior 18509810 18509814 46853
125 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18509810 18509814 46853
126 BTA-25900-no-rs rs41575397 13 Reproduction 18558538 18558542 46854
127 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18558538 18558542 46854
128 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46855
129 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46855
130 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46856
131 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46856
132 BTA-25900-no-rs rs41575397 13 Milk 18558538 18558542 46857
133 ARS-BFGL-BAC-7444 rs110491621 13 Milk 18558538 18558542 46857
134 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46858
135 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46858
136 BTA-25900-no-rs rs41575397 13 Milk 18558538 18558542 46859
137 ARS-BFGL-BAC-7444 rs110491621 13 Milk 18558538 18558542 46859
138 BTA-25900-no-rs rs41575397 13 Production 18558538 18558542 46860
139 ARS-BFGL-BAC-7444 rs110491621 13 Production 18558538 18558542 46860
140 BTA-25900-no-rs rs41575397 13 Production 18558538 18558542 46861
141 ARS-BFGL-BAC-7444 rs110491621 13 Production 18558538 18558542 46861
142 BTA-25900-no-rs rs41575397 13 Milk 18558538 18558542 46862
143 ARS-BFGL-BAC-7444 rs110491621 13 Milk 18558538 18558542 46862
144 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46863
145 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46863
146 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46864
147 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46864
148 BTA-25900-no-rs rs41575397 13 Reproduction 18558538 18558542 46865
149 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18558538 18558542 46865
150 BTA-25900-no-rs rs41575397 13 Reproduction 18558538 18558542 46866
151 ARS-BFGL-BAC-7444 rs110491621 13 Reproduction 18558538 18558542 46866
152 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46867
153 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46867
154 BTA-25900-no-rs rs41575397 13 Exterior 18558538 18558542 46868
155 ARS-BFGL-BAC-7444 rs110491621 13 Exterior 18558538 18558542 46868
156 BTB-01948148 rs43056622 3 Reproduction 111708234 111708238 30008
157 Hapmap42062-BTA-109789 rs41621207 3 Reproduction 111708234 111708238 30008
158 BTB-01641394 rs42752353 3 Reproduction 111708234 111708238 30008
159 ARS-BFGL-NGS-37809 rs42751504 3 Reproduction 111708234 111708238 30008
160 BTB-01948148 rs43056622 3 Reproduction 111708234 111708238 30244
161 Hapmap42062-BTA-109789 rs41621207 3 Reproduction 111708234 111708238 30244
162 BTB-01641394 rs42752353 3 Reproduction 111708234 111708238 30244
163 ARS-BFGL-NGS-37809 rs42751504 3 Reproduction 111708234 111708238 30244
164 BTB-01948148 rs43056622 3 Meat_and_Carcass 111708234 111708238 152258
165 Hapmap42062-BTA-109789 rs41621207 3 Meat_and_Carcass 111708234 111708238 152258
166 BTB-01641394 rs42752353 3 Meat_and_Carcass 111708234 111708238 152258
167 ARS-BFGL-NGS-37809 rs42751504 3 Meat_and_Carcass 111708234 111708238 152258
168 BTB-01948148 rs43056622 3 Meat_and_Carcass 111717521 111717525 225527
169 Hapmap42062-BTA-109789 rs41621207 3 Meat_and_Carcass 111717521 111717525 225527
170 BTB-01641394 rs42752353 3 Meat_and_Carcass 111717521 111717525 225527
171 ARS-BFGL-NGS-37809 rs42751504 3 Meat_and_Carcass 111717521 111717525 225527
172 ARS-BFGL-NGS-97849 rs110553601 3 Meat_and_Carcass 111991214 111991218 151880
173 ARS-BFGL-NGS-97849 rs110553601 3 Meat_and_Carcass 111991214 111991218 152096
174 BTB-02063964 rs43172105 15 Reproduction 10936000 10936004 47797
175 BTB-01830390 rs42938737 15 Reproduction 10936000 10936004 47797
176 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 47797
177 BTB-02063964 rs43172105 15 Reproduction 10936000 10936004 47798
178 BTB-01830390 rs42938737 15 Reproduction 10936000 10936004 47798
179 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 47798
180 BTB-02063964 rs43172105 15 Exterior 10936000 10936004 47799
181 BTB-01830390 rs42938737 15 Exterior 10936000 10936004 47799
182 BTB-01608944 rs42723390 15 Exterior 10936000 10936004 47799
183 BTB-02063964 rs43172105 15 Milk 10936000 10936004 47800
184 BTB-01830390 rs42938737 15 Milk 10936000 10936004 47800
185 BTB-01608944 rs42723390 15 Milk 10936000 10936004 47800
186 BTB-02063964 rs43172105 15 Milk 10936000 10936004 47801
187 BTB-01830390 rs42938737 15 Milk 10936000 10936004 47801
188 BTB-01608944 rs42723390 15 Milk 10936000 10936004 47801
189 BTB-02063964 rs43172105 15 Production 10936000 10936004 47802
190 BTB-01830390 rs42938737 15 Production 10936000 10936004 47802
191 BTB-01608944 rs42723390 15 Production 10936000 10936004 47802
192 BTB-02063964 rs43172105 15 Production 10936000 10936004 47803
193 BTB-01830390 rs42938737 15 Production 10936000 10936004 47803
194 BTB-01608944 rs42723390 15 Production 10936000 10936004 47803
195 BTB-02063964 rs43172105 15 Milk 10936000 10936004 47804
196 BTB-01830390 rs42938737 15 Milk 10936000 10936004 47804
197 BTB-01608944 rs42723390 15 Milk 10936000 10936004 47804
198 BTB-02063964 rs43172105 15 Milk 10936000 10936004 47805
199 BTB-01830390 rs42938737 15 Milk 10936000 10936004 47805
200 BTB-01608944 rs42723390 15 Milk 10936000 10936004 47805
201 BTB-02063964 rs43172105 15 Reproduction 10936000 10936004 47806
202 BTB-01830390 rs42938737 15 Reproduction 10936000 10936004 47806
203 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 47806
204 BTB-02063964 rs43172105 15 Health 10936000 10936004 47807
205 BTB-01830390 rs42938737 15 Health 10936000 10936004 47807
206 BTB-01608944 rs42723390 15 Health 10936000 10936004 47807
207 BTB-02063964 rs43172105 15 Reproduction 10936000 10936004 47808
208 BTB-01830390 rs42938737 15 Reproduction 10936000 10936004 47808
209 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 47808
210 BTB-02063964 rs43172105 15 Exterior 10936000 10936004 47809
211 BTB-01830390 rs42938737 15 Exterior 10936000 10936004 47809
212 BTB-01608944 rs42723390 15 Exterior 10936000 10936004 47809
213 BTB-02063964 rs43172105 15 Exterior 10936000 10936004 47810
214 BTB-01830390 rs42938737 15 Exterior 10936000 10936004 47810
215 BTB-01608944 rs42723390 15 Exterior 10936000 10936004 47810
216 BTB-02063964 rs43172105 15 Exterior 10936000 10936004 47811
217 BTB-01830390 rs42938737 15 Exterior 10936000 10936004 47811
218 BTB-01608944 rs42723390 15 Exterior 10936000 10936004 47811
219 BTB-02063964 rs43172105 15 Reproduction 10936000 10936004 281490
220 BTB-01830390 rs42938737 15 Reproduction 10936000 10936004 281490
221 BTB-01608944 rs42723390 15 Reproduction 10936000 10936004 281490
222 BTB-01421892 rs42544714 15 Meat_and_Carcass 11198692 11198696 226564
223 BTB-01421934 rs42545356 15 Meat_and_Carcass 11198692 11198696 226564
224 BTB-01422008 rs42545430 15 Meat_and_Carcass 11198692 11198696 226564
225 ARS-BFGL-NGS-2713 rs41761360 15 Meat_and_Carcass 34025602 34025606 152600
226 ARS-BFGL-NGS-98724 rs109709275 15 Reproduction 34139603 34139607 62361
227 ARS-BFGL-NGS-5976 rs41763278 15 Reproduction 34139603 34139607 62361
228 ARS-BFGL-NGS-98724 rs109709275 15 Reproduction 34139603 34139607 62407
229 ARS-BFGL-NGS-5976 rs41763278 15 Reproduction 34139603 34139607 62407
230 ARS-BFGL-NGS-98724 rs109709275 15 Reproduction 34139603 34139607 62411
231 ARS-BFGL-NGS-5976 rs41763278 15 Reproduction 34139603 34139607 62411
232 ARS-BFGL-NGS-5976 rs41763278 15 Milk 34161889 34161893 155975
233 ARS-BFGL-NGS-5976 rs41763278 15 Reproduction 34164294 34164298 62410
234 ARS-BFGL-NGS-3276 rs110634531 20 Health 12088254 12088258 179019
235 ARS-BFGL-NGS-78615 rs110959523 20 Health 12088254 12088258 179019
236 BTA-73915-no-rs rs41648979 5 Reproduction 6318394 6318398 212544
237 BTB-01434227 rs42557533 10 Health 50597586 50597590 156580

QTL Plots

# Set working directory
setwd("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10")

#QTL type plot
oldpar <- par(mar=c(1,15,0.5,1))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1.5)

#QTL names plot (by type)

# Set the width and height in pixels for 600 DPI
png("qtl_names_w05.png", width=5100, height=6600, res=600)  # Width and height in pixels
# Set layout for multiple plots
par(mfrow=c(6, 1), mar=c(2, 20, 1, 1))
# List of QTL classes for titles
qtl_classes <- c("Production", "Reproduction", "Milk", "Meat_and_Carcass", "Health", "Exterior")
# Loop through each QTL class and plot
for (qtl_class in qtl_classes) {
  plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class=qtl_class)
  title(main=qtl_class)  # Add the QTL class as the main title for each plot
}
# Close the device
dev.off()


#QTL enrichment analysis 

out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_name_w05.csv")


out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_type_w05.csv")


#Plots

#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL, out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL, out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL type My Image

QTL name by type My Image

11.3.3.1 Windows - QTL enrichment on GALLO

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
11 Foot angle 6 672 128 163224 0.0000169 0.0005655 Exterior
32 Rear leg placement - side view 5 430 128 163224 0.0000251 0.0005655 Exterior
6 Calving ease 12 3819 128 163224 0.0000513 0.0005773 Reproduction
31 Rear leg placement - rear view 5 491 128 163224 0.0000471 0.0005773 Exterior
28 Net merit 6 903 128 163224 0.0000863 0.0007764 Production
37 Stillbirth 7 1363 128 163224 0.0001097 0.0008227 Reproduction
9 Feet and leg conformation 5 627 128 163224 0.0001476 0.0009488 Exterior
44 Udder depth 5 695 128 163224 0.0002371 0.0013337 Exterior
30 PTA type 4 627 128 163224 0.0015793 0.0078963 Production
43 Udder attachment 4 655 128 163224 0.0018503 0.0083263 Exterior
25 Milk tridecylic acid content 3 319 128 163224 0.0021074 0.0084756 Milk
40 Teat placement - front 3 327 128 163224 0.0022602 0.0084756 Exterior
10 Fertility index 2 108 128 163224 0.0033389 0.0115577 Reproduction
12 Head width 1 6 128 163224 0.0046960 0.0150944 Production
17 Length of productive life 6 2004 128 163224 0.0051861 0.0155583 Production
33 Respiratory rate 1 9 128 163224 0.0070359 0.0197884 Health
34 Rump conformation 1 13 128 163224 0.0101471 0.0268600 Exterior
38 Strength 3 664 128 163224 0.0157176 0.0392939 Exterior
3 Body temperature 1 23 128 163224 0.0178830 0.0423545 Health
26 Muscle calcium content 1 40 128 163224 0.0308966 0.0695174 Meat and Carcass
27 Muscle sodium content 1 56 128 163224 0.0429884 0.0921180 Meat and Carcass
14 Interval from first to last insemination 2 445 128 163224 0.0481346 0.0984572 Reproduction
15 Interval to first estrus after calving 3 1053 128 163224 0.0505607 0.0989232 Reproduction
45 Udder height 2 504 128 163224 0.0599662 0.1124366 Exterior
16 Intramuscular fat 1 117 128 163224 0.0877305 0.1579149 Meat and Carcass
8 Fat thickness at the 12th rib 1 169 128 163224 0.1242283 0.2150105 Meat and Carcass
22 Milk glycosylated kappa-casein percentage 4 2527 128 163224 0.1380822 0.2301370 Milk
1 Age at first calving 1 233 128 163224 0.1671652 0.2686584 Reproduction
39 Teat length 1 300 128 163224 0.2098779 0.3148168 Exterior
41 Teat placement - rear 1 298 128 163224 0.2086349 0.3148168 Exterior
36 Somatic cell score 2 1122 128 163224 0.2199895 0.3193396 Health
18 M. paratuberculosis susceptibility 1 492 128 163224 0.3206093 0.4508568 Health
2 Body depth 1 616 128 163224 0.3837908 0.5233511 Production
19 Marbling score 2 1817 128 163224 0.4175991 0.5369131 Meat and Carcass
35 Shear force 3 2954 128 163224 0.4091307 0.5369131 Meat and Carcass
24 Milk protein yield 3 3093 128 163224 0.4380445 0.5475556 Milk
13 Inseminations per conception 1 790 128 163224 0.4627346 0.5627854 Reproduction
29 Pregnancy rate 1 944 128 163224 0.5241831 0.6207431 Reproduction
5 Bovine tuberculosis susceptibility 1 1155 128 163224 0.5972036 0.6890811 Health
4 Body weight 1 4289 128 163224 0.9669506 0.9713008 Production
7 Connective tissue amount 1 3142 128 163224 0.9170032 0.9713008 Meat and Carcass
20 Milk fat percentage 5 10941 128 163224 0.9357122 0.9713008 Milk
21 Milk fat yield 4 8220 128 163224 0.8906967 0.9713008 Milk
23 Milk protein percentage 3 8803 128 163224 0.9713008 0.9713008 Milk
42 Tenderness score 1 3483 128 163224 0.9368355 0.9713008 Meat and Carcass

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 41 9077 128 163224 0.0000000 0.0000000
2 Health 6 5889 128 163224 0.3161000 0.6049498
3 Meat and Carcass 11 18258 128 163224 0.8597556 1.0000000
4 Milk 22 75352 128 163224 1.0000000 1.0000000
5 Production 19 19640 128 163224 0.1968827 0.5906482
6 Reproduction 29 35008 128 163224 0.4032999 0.6049498

My Image

12 Heritability estimation - BLUPF90

12.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • Parameter file

      12.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Phenotype THIRD COLUMN = Fixed Effect 1 FOURTH COLUMN = Fixed Effect 2

      To get in one file these four columns we need the following code:

      #File with equivalence among different ids
      eq_ids <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
      
      # Genotype file with cid
      geno <- read.table("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
      
      # Phenotipic file and fixed effects
      data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")
      
      # creating a pheno file with only ID, Cortisol, BY and Sam date columns
      pheno <- data_final %>%
        select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column iid and and bring the iid from eq_ids to geno file
      pheno$iid <- eq_ids$iid[match(pheno$ID, eq_ids$elora_id)]
      
      # organizing columns sequence and keep only iid
      pheno <- pheno%>%
        select(iid, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column geno$iid, and bring the iid from eq_ids to geno file
      geno$iid <- eq_ids$iid[match(geno$V2, eq_ids$cdn_id)]
      
      # organizing the columns sequence
      library(dplyr)
      geno <- geno %>%
        select(V1, V2, iid, everything())
      
      # Keeping in the pheno file only the rows present also in geno file
      pheno <- pheno %>%
        filter(iid %in% geno$iid)
      
      write.table(pheno, "/home/bambrozi/2_CORTISOL/Heritability_BLUPF90/pheno_fix_eff.txt", sep = " ", col.names = FALSE, row.names = FALSE, quote = FALSE)

      The file should be saved as text file, with separation by space and no columns names.

      12.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Sire ID THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      # to replace comma for space in the .csv file with the equivalence among IDs
      sed -i 's/,/ /g' bruno_ids.csv

      12.1.3 Genotype file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid

      12.1.4 Download the executable files

      Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
      • BlupF90+
      • renumF90

      12.1.5 Parameter file

      The appearance of this file is like this:

      My Image

      • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
      • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
      • FIELDS_PASSED TO OUTPUT:
      • WEIGHT (S):
      • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
      • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
      • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
      • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
      • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
      • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
      • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
      • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
      • OPTION method: VCE (Variance Component Estimation).
      • OPTION OrigID: this will keep the original ID informed.
      • OPTION missing 9999: you are informing that missing values will appear as 9999
      • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
        • H2_1: the name that your function will appear on the output files.
        • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
        • **_1_1**: this effect is in the 1st column.
        • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

12.2 Running renumF90 and BlupF90+

  1. Go to the server you wanna run this analysis, for instance, grand

  2. Now go to the directory you have created to run this analysis where that set of files are placed.

ssh grand
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will as you the name of your parameter card.

renumF90 will generate a new parameter card called renf90.par

  1. Run blupf90+
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the new one renf90.par

blupF90+ will generate the blupf90.log file with the results.

My Image

Now you should update you renf90.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90.par (CO) VARIANCE

  1. 2nd blupf90+ run
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the UPDATED renf90.par

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

12.3 Running renumF90 and BlupF90+ adding GENOTYPES

The previous analysis considered only the pedigree, but now we can insert the genotype information. To perform this you need a new diretory called Blup_Genomic inside your previously created directory.

Now you need add the reference for your genotype file in your previous parameter file renum.par and save in this new sub-directory.

The highlighted text show the added part.

My Image

Go to the sub-diretory

Note that as you are in the subdirectory, but your phenotype and fixed effect, pedigree and genotype files are still in the previous directory you need to add the highlighted part to inform the correct location

My Image

To run the renumf90 and blupf90+ you also need to add ../ to correct specify the location. Run renumF90

./../renumf90

Run blupf90+

./../blupf90+

The steps for run, update parameter card, re-run are the same.

12.4 Results

We have 2 different output files

  1. Variance components: blupf90.log

My Image

In this file we can find the heritabilit (SD) and for instance the convergence (similarity)

  1. Solutions: solutions.orig
    In this file we will find the solutions (results) for each effect

In our example:

  • EFFECT 1: Birth Year, has 4 levels (2018, 2019, 2020 and 2021), and the solution that for this fixed effect is how much each level add.
  • EFFECT 2: Sampling date, has 23 levels, and the solutions
  • EFFECT 3: Animal random effect, has one for each animal and it is the EBV or GEBV.

    My Image

12.5 Additional analysis

12.5.1 GEBVs Average and SD

I calculated the average and SD for the GEBVs

GEBVs <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/solutions.orig", header = T)

equiv_id <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")

mer <- merge(equiv_id, GEBVs, by.x = "iid", by.y = "original_id")

cortisol  <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")

output <- merge(cortisol, mer, by.x = "ID", by.y = "elora_id")

colnames(output)

output <- output[, c("ID", "iid", "cdn_id", "T4Cortisol", "solution")]

mean_solution <- mean(output$solution, na.rm = TRUE)
sd_solution <- sd(output$solution, na.rm = TRUE)

mean_solution
sd_solution

library(ggplot2)

# Create histogram with density curve, mean, and SD lines
ggplot(output, aes(x = solution)) +
  geom_histogram(aes(y = ..density..), fill = "lightblue", color = "black", bins = 30, alpha = 0.6) +
  geom_density(color = "darkred", size = 1) +  # Density curve
  geom_vline(aes(xintercept = mean_solution), color = "red", linetype = "dashed", size = 1) +
  geom_vline(aes(xintercept = mean_solution + sd_solution), color = "blue", linetype = "dashed", size = 1) +
  geom_vline(aes(xintercept = mean_solution - sd_solution), color = "blue", linetype = "dashed", size = 1) +
  labs(title = "Histogram and Density Curve of GEBVs", x = "Solution", y = "Density") +
  theme_minimal()


# Subset for values BELOW 1 SD from the mean
output_low <- output %>% 
  filter(solution < (mean_solution - sd_solution))

# Subset for values ABOVE 1 SD from the mean
output_high <- output %>% 
  filter(solution > (mean_solution + sd_solution))

# Subset for values WITHIN 1 SD from the mean (intermediary)
output_intermediary <- output %>% 
  filter(solution >= (mean_solution - sd_solution) & solution <= (mean_solution + sd_solution))

write.csv(output, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/all.csv")
write.csv(output_low, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/low.csv")
write.csv(output_high, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/high.csv")
write.csv(output_intermediary, "/home/bambrozi/2_CORTISOL/GEBVs_subsets/intermediary.csv")

mean_solution <- round(mean(output$solution, na.rm = TRUE), 2)
sd_solution <- round(sd(output$solution, na.rm = TRUE), 2)


output$Category <- ifelse(output$T4Cortisol >= 956, "H", 
                            ifelse(output_low <= 190.8, "L", "M"))

# Create the plot
ggplot(output, aes(x = T4Cortisol, y = solution)) +
  geom_point(size = 2, color = "black") +  # Smaller scatter plot points
  labs(x = "T4 Cortisol (pg/mL)", y = "GEBV", title = "T4 Cortisol vs GEBV") +
  geom_hline(yintercept = mean_solution, color = "blue", linetype = "dashed", size = 1) +  # Mean line
  geom_hline(yintercept = mean_solution + sd_solution, color = "red", linetype = "dashed", size = 1) +  # +1SD line
  geom_hline(yintercept = mean_solution - sd_solution, color = "red", linetype = "dashed", size = 1) +  # -1SD line
  geom_text(aes(x = max(output$T4Cortisol), y = mean_solution, label = "Mean"), color = "blue", vjust = -0.5, hjust = 1) +  # Label for mean
  geom_text(aes(x = max(output$T4Cortisol), y = mean_solution + sd_solution, label = "+1SD"), color = "red", vjust = -0.5, hjust = 1) +  # Label for +1SD
  geom_text(aes(x = max(output$T4Cortisol), y = mean_solution - sd_solution, label = "-1SD"), color = "red", vjust = -0.5, hjust = 1) +  # Label for -1SD
  theme_minimal() +
  theme(
    axis.line = element_line(color = "black"), 
    panel.grid.major = element_line(color = "gray80", linewidth = 0.5), 
    panel.grid.minor = element_line(color = "gray90", linewidth = 0.25), 
    legend.title = element_text(size = 10),
    legend.position = "right"
  )

12.5.2 Age at sampling date

Merg_Cort_GEBVs <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")

#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")

colnames(Merg_Cort_GEBVs)[405] <- "IDD"

data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, BIRTH_DATE, all_of(Columns_to_use))

samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")

# Convert Sampling_date to Date using as.Date
samp_date2$Sampling_date <- as.Date(samp_date2$Sampling_date, format = "%m/%d/%Y")

table(samp_date2$Sampling_date)

samp_date <- select(samp_date2, Elora_id, Sampling_date)

# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)

data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")

data_final <- data_final %>%
  select(ID, T4Cortisol, BIRTH_YEAR, BIRTH_DATE, Sampling_date)

library(lubridate)

data_final$BIRTH_DATE <- ymd(data_final$BIRTH_DATE)

# Calculate age in total days
data_final$Age_days <- as.numeric(difftime(data_final$Sampling_date, data_final$BIRTH_DATE, units = "days"))


# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/Data/data_age_samplingdate.csv")

# Calculate values
average_age <- mean(data_final$Age_days, na.rm = TRUE)
sd_age <- sd(data_final$Age_days, na.rm = TRUE)
median_age <- median(data_final$Age_days, na.rm = TRUE)
range_age <- range(data_final$Age_days, na.rm = TRUE)

# Create a data frame to save
age_summary <- data.frame(
  Metric = c("Mean", "Standard Deviation", "Median", "Min", "Max"),
  Value = c(average_age, sd_age, median_age, range_age[1], range_age[2])
)

# Save to a CSV file
write.csv(age_summary, "/home/bambrozi/2_CORTISOL/Data/age_summary.csv", row.names = FALSE)
Metric Value
Mean 244.86822
Standard Deviation 42.92195
Median 237.50000
Min 162.00000
Max 365.00000

12.5.3 Common enriched QTLs

qtl_pval <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_enrich_qtl_genome_name_pvalue.csv")
qtl_w05 <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_name_w05.csv")
qtl_w01 <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_name_w01.csv")

library(tidyverse)

qtl_pval <- filter(qtl_pval, adj.pval <= 0.05) 
qtl_w05 <- filter(qtl_w05, adj.pval <= 0.05) 
qtl_w01 <- filter(qtl_w01, adj.pval <= 0.05) 

qtl_pval$approach <- "pval"
qtl_w05$approach <- "w05"
qtl_w01$approach <- "w01"

qtl_all_approachs <- rbind(qtl_pval, qtl_w05, qtl_w01)

write.csv(qtl_all_approachs, "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/enrich_sig_qtl_all_approachs.csv")

qtl_names <- unique(qtl_all_approachs$QTL)

qtl_names <- sort(qtl_names)


# Create a matrix with the desired dimensions
matrix_length <- length(qtl_names)
common_enrich_qtl <- matrix(NA, nrow = matrix_length, ncol = 4)

# Optionally, name the rows and columns
colnames(common_enrich_qtl) <- c("QTL_names", "Single_SNP", "Windows_0.5", "Windows_0.1")

common_enrich_qtl[, "QTL_names"] <- qtl_names

common_enrich_qtl <- as.data.frame(common_enrich_qtl)

# Ensure both vectors have the same length (or handle if they don't)
common_enrich_qtl$Single_SNP <- ifelse(common_enrich_qtl$QTL_names %in% qtl_pval$QTL, "yes", NA)
common_enrich_qtl$Windows_0.5 <- ifelse(common_enrich_qtl$QTL_names %in% qtl_w05$QTL, "yes", NA)
common_enrich_qtl$Windows_0.1 <- ifelse(common_enrich_qtl$QTL_names %in% qtl_w01$QTL, "yes", NA)

write.csv(common_enrich_qtl, "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/common_enrich_qtl.csv")